118 lines
3.3 KiB
Python
118 lines
3.3 KiB
Python
#!/usr/bin/env python3
|
|
#
|
|
# Orion Rigel --- Text to Speech engine
|
|
#
|
|
# Copyright (c) 2022 Sameer Rahmani <lxsameer@gnu.org>
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 2 of the License.
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
import asyncio
|
|
import numpy as np
|
|
# pylint: disable=redefined-outer-name, unused-argument
|
|
from pathlib import Path
|
|
|
|
import simpleaudio as sa
|
|
|
|
from TTS.utils.manage import ModelManager
|
|
from TTS.utils.synthesizer import Synthesizer
|
|
|
|
|
|
def to_wav_data(wav):
|
|
wav_norm = np.array(wav) * (32767 / max(0.01, np.max(np.abs(wav))))
|
|
return wav_norm.astype(np.int16)
|
|
|
|
|
|
def play(wav):
|
|
#p = pyaudio.PyAudio()
|
|
try:
|
|
# open stream
|
|
# stream = p.open(format=p.get_format_from_width(wav.getsampwidth()),
|
|
# channels=wav.getnchannels(),
|
|
# rate=wav.getframerate(),
|
|
# output=True)
|
|
# stream = p.open(format=pyaudio.paInt16,
|
|
# channels=1,
|
|
# rate=22050,
|
|
# output=True)
|
|
|
|
# stream.write(to_wav_data(wav))
|
|
|
|
# # stop stream
|
|
# stream.stop_stream()
|
|
# stream.close()
|
|
# close PyAudio
|
|
# Start audio
|
|
play = sa.play_buffer(to_wav_data(wav), 1, 2, 22050)
|
|
|
|
# Wait for audio playback to finish before exiting
|
|
play.wait_done()
|
|
|
|
finally:
|
|
play.stop()
|
|
#p.terminate()
|
|
|
|
|
|
def synth(config):
|
|
path = Path(__file__).parent / ".models.json"
|
|
manager = ModelManager(path)
|
|
|
|
model_name = config.get("MODEL_NAME", "tts_models/en/ljspeech/tacotron2-DDC")
|
|
language_ids_file_path = None
|
|
vocoder_path = None
|
|
vocoder_config_path = None
|
|
encoder_path = None
|
|
encoder_config_path = None
|
|
|
|
model_path, config_path, model_item = manager.download_model(model_name)
|
|
vocoder_name = model_item["default_vocoder"]
|
|
vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
|
|
speaker_idx = config.get("SPEAKER_IDX")
|
|
|
|
# load models
|
|
synthesizer = Synthesizer(
|
|
model_path,
|
|
config_path,
|
|
None,
|
|
language_ids_file_path,
|
|
vocoder_path,
|
|
vocoder_config_path,
|
|
encoder_path,
|
|
encoder_config_path,
|
|
False,
|
|
)
|
|
|
|
async def tcp_handler(reader, writer):
|
|
# Read till EOF
|
|
data = await reader.read(-1)
|
|
message = data.decode()
|
|
wav = synthesizer.tts(message, speaker_idx, "None", None)
|
|
|
|
play(wav)
|
|
|
|
print(f"Received {message!r}")
|
|
|
|
writer.write(b"Ok")
|
|
await writer.drain()
|
|
writer.close()
|
|
|
|
return tcp_handler
|
|
|
|
|
|
async def start(config, fn):
|
|
host = config.get('host', '127.0.0.1')
|
|
port = config.get('port', 6666)
|
|
server = await asyncio.start_server(fn, host, port)
|
|
|
|
addrs = ', '.join(str(sock.getsockname()) for sock in server.sockets)
|
|
print(f'Serving on {addrs}')
|
|
|
|
async with server:
|
|
await server.serve_forever()
|