Add the Rigel for TTS
This commit is contained in:
parent
6071d71c41
commit
8578d24c51
|
@ -7,4 +7,6 @@ nc.tar.xz
|
||||||
*.ogg
|
*.ogg
|
||||||
*.wav
|
*.wav
|
||||||
models
|
models
|
||||||
demo
|
demo
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
8
builder
8
builder
|
@ -92,6 +92,7 @@ function setup() { ## Setup the working directory and make it ready for developm
|
||||||
# shellcheck source=.venv/bin/activate
|
# shellcheck source=.venv/bin/activate
|
||||||
. "$ME/.venv/bin/activate"
|
. "$ME/.venv/bin/activate"
|
||||||
info "Intalling tflite runtime..."
|
info "Intalling tflite runtime..."
|
||||||
|
pip3 install -r rigel/requirements.txt
|
||||||
pip3 install --extra-index-url https://google-coral.github.io/py-repo/ tflite_runtime
|
pip3 install --extra-index-url https://google-coral.github.io/py-repo/ tflite_runtime
|
||||||
|
|
||||||
info "Downloding STT..."
|
info "Downloding STT..."
|
||||||
|
@ -120,6 +121,13 @@ function run() { ## Setup the working directory and make it ready for developmen
|
||||||
deactivate
|
deactivate
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function rigel() { ## Run the rigel server
|
||||||
|
# shellcheck source=.venv/bin/activate
|
||||||
|
. "$ME/.venv/bin/activate"
|
||||||
|
"$ME/rigel/server.py" "$ME/.env"
|
||||||
|
deactivate
|
||||||
|
}
|
||||||
|
|
||||||
function help() { ## Print out this help message
|
function help() { ## Print out this help message
|
||||||
echo "Commands:"
|
echo "Commands:"
|
||||||
grep -E '^function [a-zA-Z0-9_-]+\(\) \{ ## .*$$' "$0" | \
|
grep -E '^function [a-zA-Z0-9_-]+\(\) \{ ## .*$$' "$0" | \
|
||||||
|
|
|
@ -0,0 +1,377 @@
|
||||||
|
{
|
||||||
|
"tts_models": {
|
||||||
|
"multilingual":{
|
||||||
|
"multi-dataset":{
|
||||||
|
"your_tts":{
|
||||||
|
"description": "Your TTS model accompanying the paper https://arxiv.org/abs/2112.02418",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.0_models/tts_models--multilingual--multi-dataset--your_tts.zip",
|
||||||
|
"default_vocoder": null,
|
||||||
|
"commit": "e9a1953e",
|
||||||
|
"license": "CC BY-NC-ND 4.0",
|
||||||
|
"contact": "egolge@coqui.ai"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"en": {
|
||||||
|
"ek1": {
|
||||||
|
"tacotron2": {
|
||||||
|
"description": "EK1 en-rp tacotron2 by NMStoker",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.1.0/tts_models--en--ek1--tacotron2.zip",
|
||||||
|
"default_vocoder": "vocoder_models/en/ek1/wavegrad",
|
||||||
|
"commit": "c802255"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ljspeech": {
|
||||||
|
"tacotron2-DDC": {
|
||||||
|
"description": "Tacotron2 with Double Decoder Consistency.",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.12/tts_models--en--ljspeech--tacotron2-DDC.zip",
|
||||||
|
"default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
|
||||||
|
"commit": "bae2ad0f",
|
||||||
|
"author": "Eren Gölge @erogol",
|
||||||
|
"license": "",
|
||||||
|
"contact": "egolge@coqui.com"
|
||||||
|
},
|
||||||
|
"tacotron2-DDC_ph": {
|
||||||
|
"description": "Tacotron2 with Double Decoder Consistency with phonemes.",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.0_models/tts_models--en--ljspeech--tacotron2-DDC_ph.zip",
|
||||||
|
"default_vocoder": "vocoder_models/en/ljspeech/univnet",
|
||||||
|
"commit": "3900448",
|
||||||
|
"author": "Eren Gölge @erogol",
|
||||||
|
"license": "",
|
||||||
|
"contact": "egolge@coqui.com"
|
||||||
|
},
|
||||||
|
"glow-tts": {
|
||||||
|
"description": "",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/tts_models--en--ljspeech--glow-tts.zip",
|
||||||
|
"stats_file": null,
|
||||||
|
"default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan",
|
||||||
|
"commit": "",
|
||||||
|
"author": "Eren Gölge @erogol",
|
||||||
|
"license": "MPL",
|
||||||
|
"contact": "egolge@coqui.com"
|
||||||
|
},
|
||||||
|
"speedy-speech": {
|
||||||
|
"description": "Speedy Speech model trained on LJSpeech dataset using the Alignment Network for learning the durations.",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.3.0/tts_models--en--ljspeech--speedy_speech.zip",
|
||||||
|
"stats_file": null,
|
||||||
|
"default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
|
||||||
|
"commit": "4581e3d",
|
||||||
|
"author": "Eren Gölge @erogol",
|
||||||
|
"license": "TBD",
|
||||||
|
"contact": "egolge@coqui.com"
|
||||||
|
},
|
||||||
|
"tacotron2-DCA": {
|
||||||
|
"description": "",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/tts_models--en--ljspeech--tacotron2-DCA.zip",
|
||||||
|
"default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan",
|
||||||
|
"commit": "",
|
||||||
|
"author": "Eren Gölge @erogol",
|
||||||
|
"license": "MPL",
|
||||||
|
"contact": "egolge@coqui.com"
|
||||||
|
},
|
||||||
|
"vits": {
|
||||||
|
"description": "VITS is an End2End TTS model trained on LJSpeech dataset with phonemes.",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.0_models/tts_models--en--ljspeech--vits.zip",
|
||||||
|
"default_vocoder": null,
|
||||||
|
"commit": "3900448",
|
||||||
|
"author": "Eren Gölge @erogol",
|
||||||
|
"license": "TBD",
|
||||||
|
"contact": "egolge@coqui.com"
|
||||||
|
},
|
||||||
|
"fast_pitch": {
|
||||||
|
"description": "FastPitch model trained on LJSpeech using the Aligner Network",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.2.2/tts_models--en--ljspeech--fast_pitch.zip",
|
||||||
|
"default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
|
||||||
|
"commit": "b27b3ba",
|
||||||
|
"author": "Eren Gölge @erogol",
|
||||||
|
"license": "TBD",
|
||||||
|
"contact": "egolge@coqui.com"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"vctk": {
|
||||||
|
"vits": {
|
||||||
|
"description": "VITS End2End TTS model trained on VCTK dataset with 109 different speakers with EN accent.",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.0_models/tts_models--en--vctk--vits.zip",
|
||||||
|
"default_vocoder": null,
|
||||||
|
"commit": "3900448",
|
||||||
|
"author": "Eren @erogol",
|
||||||
|
"license": "",
|
||||||
|
"contact": "egolge@coqui.ai"
|
||||||
|
},
|
||||||
|
"fast_pitch":{
|
||||||
|
"description": "FastPitch model trained on VCTK dataseset.",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.0_models/tts_models--en--vctk--fast_pitch.zip",
|
||||||
|
"default_vocoder": null,
|
||||||
|
"commit": "bdab788d",
|
||||||
|
"author": "Eren @erogol",
|
||||||
|
"license": "CC BY-NC-ND 4.0",
|
||||||
|
"contact": "egolge@coqui.ai"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"sam": {
|
||||||
|
"tacotron-DDC": {
|
||||||
|
"description": "Tacotron2 with Double Decoder Consistency trained with Aceenture's Sam dataset.",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.13/tts_models--en--sam--tacotron_DDC.zip",
|
||||||
|
"default_vocoder": "vocoder_models/en/sam/hifigan_v2",
|
||||||
|
"commit": "bae2ad0f",
|
||||||
|
"author": "Eren Gölge @erogol",
|
||||||
|
"license": "",
|
||||||
|
"contact": "egolge@coqui.com"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"es": {
|
||||||
|
"mai": {
|
||||||
|
"tacotron2-DDC": {
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/tts_models--es--mai--tacotron2-DDC.zip",
|
||||||
|
"default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
|
||||||
|
"commit": "",
|
||||||
|
"author": "Eren Gölge @erogol",
|
||||||
|
"license": "MPL",
|
||||||
|
"contact": "egolge@coqui.com"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"fr": {
|
||||||
|
"mai": {
|
||||||
|
"tacotron2-DDC": {
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/tts_models--fr--mai--tacotron2-DDC.zip",
|
||||||
|
"default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
|
||||||
|
"commit": "",
|
||||||
|
"author": "Eren Gölge @erogol",
|
||||||
|
"license": "MPL",
|
||||||
|
"contact": "egolge@coqui.com"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"uk":{
|
||||||
|
"mai": {
|
||||||
|
"glow-tts": {
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.0_models/tts_models--uk--mai--glow-tts.zip",
|
||||||
|
"author":"@robinhad",
|
||||||
|
"commit": "bdab788d",
|
||||||
|
"license": "MIT",
|
||||||
|
"contact": "",
|
||||||
|
"default_vocoder": "vocoder_models/uk/mai/multiband-melgan"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"zh-CN": {
|
||||||
|
"baker": {
|
||||||
|
"tacotron2-DDC-GST": {
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.0_models/tts_models--zh-CN--baker--tacotron2-DDC-GST.zip",
|
||||||
|
"commit": "unknown",
|
||||||
|
"author": "@kirianguiller",
|
||||||
|
"default_vocoder": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nl": {
|
||||||
|
"mai": {
|
||||||
|
"tacotron2-DDC": {
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.10/tts_models--nl--mai--tacotron2-DDC.zip",
|
||||||
|
"author": "@r-dh",
|
||||||
|
"default_vocoder": "vocoder_models/nl/mai/parallel-wavegan",
|
||||||
|
"stats_file": null,
|
||||||
|
"commit": "540d811"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"de": {
|
||||||
|
"thorsten": {
|
||||||
|
"tacotron2-DCA": {
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.11/tts_models--de--thorsten--tacotron2-DCA.zip",
|
||||||
|
"default_vocoder": "vocoder_models/de/thorsten/fullband-melgan",
|
||||||
|
"author": "@thorstenMueller",
|
||||||
|
"commit": "unknown"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ja": {
|
||||||
|
"kokoro": {
|
||||||
|
"tacotron2-DDC": {
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.15/tts_models--jp--kokoro--tacotron2-DDC.zip",
|
||||||
|
"default_vocoder": "vocoder_models/ja/kokoro/hifigan_v1",
|
||||||
|
"description": "Tacotron2 with Double Decoder Consistency trained with Kokoro Speech Dataset.",
|
||||||
|
"author": "@kaiidams",
|
||||||
|
"commit": "401fbd89"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tr":{
|
||||||
|
"common-voice": {
|
||||||
|
"glow-tts":{
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.0_models/tts_models--tr--common-voice--glow-tts.zip",
|
||||||
|
"default_vocoder": "vocoder_models/tr/common-voice/hifigan",
|
||||||
|
"license": "MIT",
|
||||||
|
"description": "Turkish GlowTTS model using an unknown speaker from the Common-Voice dataset.",
|
||||||
|
"author": "Fatih Akademi",
|
||||||
|
"commit": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"it": {
|
||||||
|
"mai_female": {
|
||||||
|
"glow-tts":{
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.0_models/tts_models--it--mai_female--glow-tts.zip",
|
||||||
|
"default_vocoder": null,
|
||||||
|
"description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
|
||||||
|
"author": "@nicolalandro",
|
||||||
|
"commit": null
|
||||||
|
},
|
||||||
|
"vits":{
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.0_models/tts_models--it--mai_female--vits.zip",
|
||||||
|
"default_vocoder": null,
|
||||||
|
"description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
|
||||||
|
"author": "@nicolalandro",
|
||||||
|
"commit": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mai_male": {
|
||||||
|
"glow-tts":{
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.0_models/tts_models--it--mai_male--glow-tts.zip",
|
||||||
|
"default_vocoder": null,
|
||||||
|
"description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
|
||||||
|
"author": "@nicolalandro",
|
||||||
|
"commit": null
|
||||||
|
},
|
||||||
|
"vits":{
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.0_models/tts_models--it--mai_male--vits.zip",
|
||||||
|
"default_vocoder": null,
|
||||||
|
"description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
|
||||||
|
"author": "@nicolalandro",
|
||||||
|
"commit": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"vocoder_models": {
|
||||||
|
"universal": {
|
||||||
|
"libri-tts": {
|
||||||
|
"wavegrad": {
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/vocoder_models--universal--libri-tts--wavegrad.zip",
|
||||||
|
"commit": "ea976b0",
|
||||||
|
"author": "Eren Gölge @erogol",
|
||||||
|
"license": "MPL",
|
||||||
|
"contact": "egolge@coqui.com"
|
||||||
|
},
|
||||||
|
"fullband-melgan": {
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/vocoder_models--universal--libri-tts--fullband-melgan.zip",
|
||||||
|
"commit": "4132240",
|
||||||
|
"author": "Eren Gölge @erogol",
|
||||||
|
"license": "MPL",
|
||||||
|
"contact": "egolge@coqui.com"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"en": {
|
||||||
|
"ek1": {
|
||||||
|
"wavegrad": {
|
||||||
|
"description": "EK1 en-rp wavegrad by NMStoker",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.10/vocoder_models--en--ek1--wavegrad.zip",
|
||||||
|
"commit": "c802255"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ljspeech": {
|
||||||
|
"multiband-melgan": {
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/vocoder_models--en--ljspeech--mulitband-melgan.zip",
|
||||||
|
"commit": "ea976b0",
|
||||||
|
"author": "Eren Gölge @erogol",
|
||||||
|
"license": "MPL",
|
||||||
|
"contact": "egolge@coqui.com"
|
||||||
|
},
|
||||||
|
"hifigan_v2": {
|
||||||
|
"description": "HiFiGAN_v2 LJSpeech vocoder from https://arxiv.org/abs/2010.05646.",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.12/vocoder_model--en--ljspeech-hifigan_v2.zip",
|
||||||
|
"commit": "bae2ad0f",
|
||||||
|
"author": "@erogol",
|
||||||
|
"license": "",
|
||||||
|
"contact": "egolge@coqui.ai"
|
||||||
|
},
|
||||||
|
"univnet": {
|
||||||
|
"description": "UnivNet model finetuned on TacotronDDC_ph spectrograms for better compatibility.",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.3.0/vocoder_models--en--ljspeech--univnet_v2.zip",
|
||||||
|
"commit": "4581e3d",
|
||||||
|
"author": "Eren @erogol",
|
||||||
|
"license": "TBD",
|
||||||
|
"contact": "egolge@coqui.ai"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"vctk": {
|
||||||
|
"hifigan_v2": {
|
||||||
|
"description": "Finetuned and intended to be used with tts_models/en/vctk/sc-glow-tts",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.12/vocoder_model--en--vctk--hifigan_v2.zip",
|
||||||
|
"commit": "2f07160",
|
||||||
|
"author": "Edresson Casanova",
|
||||||
|
"license": "",
|
||||||
|
"contact": ""
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"sam": {
|
||||||
|
"hifigan_v2": {
|
||||||
|
"description": "Finetuned and intended to be used with tts_models/en/sam/tacotron_DDC",
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.13/vocoder_models--en--sam--hifigan_v2.zip",
|
||||||
|
"commit": "2f07160",
|
||||||
|
"author": "Eren Gölge @erogol",
|
||||||
|
"license": "",
|
||||||
|
"contact": "egolge@coqui.ai"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nl": {
|
||||||
|
"mai": {
|
||||||
|
"parallel-wavegan": {
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.10/vocoder_models--nl--mai--parallel-wavegan.zip",
|
||||||
|
"author": "@r-dh",
|
||||||
|
"commit": "unknown"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"de": {
|
||||||
|
"thorsten": {
|
||||||
|
"wavegrad": {
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.11/vocoder_models--de--thorsten--wavegrad.zip",
|
||||||
|
"author": "@thorstenMueller",
|
||||||
|
"commit": "unknown"
|
||||||
|
},
|
||||||
|
"fullband-melgan": {
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.1.3/vocoder_models--de--thorsten--fullband-melgan.zip",
|
||||||
|
"author": "@thorstenMueller",
|
||||||
|
"commit": "unknown"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ja": {
|
||||||
|
"kokoro": {
|
||||||
|
"hifigan_v1": {
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.2.0/vocoder_models--ja--kokoro--hifigan_v1.zip",
|
||||||
|
"description": "HifiGAN model trained for kokoro dataset by @kaiidams",
|
||||||
|
"author": "@kaiidams",
|
||||||
|
"commit": "3900448"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"uk": {
|
||||||
|
"mai": {
|
||||||
|
"multiband-melgan": {
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.5.0_models/vocoder_models--uk--mai--multiband-melgan.zip",
|
||||||
|
"author":"@robinhad",
|
||||||
|
"commit": "bdab788d",
|
||||||
|
"license": "MIT",
|
||||||
|
"contact": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tr":{
|
||||||
|
"common-voice": {
|
||||||
|
"hifigan":{
|
||||||
|
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.0_models/vocoder_models--tr--common-voice--hifigan.zip",
|
||||||
|
"description": "HifiGAN model using an unknown speaker from the Common-Voice dataset.",
|
||||||
|
"author": "Fatih Akademi",
|
||||||
|
"license": "MIT",
|
||||||
|
"commit": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,2 @@
|
||||||
|
tts==0.6
|
||||||
|
python-dotenv
|
|
@ -0,0 +1,29 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
#
|
||||||
|
# Orion Rigel --- Text to Speech engine
|
||||||
|
#
|
||||||
|
# Copyright (c) 2022 Sameer Rahmani <lxsameer@gnu.org>
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 2 of the License.
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
import dotenv
|
||||||
|
|
||||||
|
from tcp import start, synth
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
config = dotenv.dotenv_values(sys.argv[1])
|
||||||
|
asyncio.run(start(config, synth(config)))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
|
@ -0,0 +1,75 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
#
|
||||||
|
# Orion Rigel --- Text to Speech engine
|
||||||
|
#
|
||||||
|
# Copyright (c) 2022 Sameer Rahmani <lxsameer@gnu.org>
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 2 of the License.
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
# pylint: disable=redefined-outer-name, unused-argument
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from TTS.utils.manage import ModelManager
|
||||||
|
from TTS.utils.synthesizer import Synthesizer
|
||||||
|
|
||||||
|
|
||||||
|
def synth(config):
|
||||||
|
path = Path(__file__).parent / ".models.json"
|
||||||
|
manager = ModelManager(path)
|
||||||
|
|
||||||
|
model_name = config.get("MODEL_NAME", "tts_models/en/ljspeech/tacotron2-DDC")
|
||||||
|
language_ids_file_path = None
|
||||||
|
vocoder_path = None
|
||||||
|
vocoder_config_path = None
|
||||||
|
encoder_path = None
|
||||||
|
encoder_config_path = None
|
||||||
|
|
||||||
|
model_path, config_path, model_item = manager.download_model(model_name)
|
||||||
|
vocoder_name = model_item["default_vocoder"]
|
||||||
|
vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
|
||||||
|
speaker_idx = config.get("SPEAKER_IDX")
|
||||||
|
|
||||||
|
# load models
|
||||||
|
synthesizer = Synthesizer(
|
||||||
|
model_path,
|
||||||
|
config_path,
|
||||||
|
None,
|
||||||
|
language_ids_file_path,
|
||||||
|
vocoder_path,
|
||||||
|
vocoder_config_path,
|
||||||
|
encoder_path,
|
||||||
|
encoder_config_path,
|
||||||
|
False,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def tcp_handler(reader, writer):
|
||||||
|
data = await reader.readuntil(b'\0')
|
||||||
|
message = data.decode()
|
||||||
|
#wav = synthesizer.tts(message, speaker_idx, "None", None)
|
||||||
|
print(f"Received {message!r}")
|
||||||
|
|
||||||
|
writer.write("Ok")
|
||||||
|
await writer.drain()
|
||||||
|
writer.close()
|
||||||
|
|
||||||
|
return tcp_handler
|
||||||
|
|
||||||
|
|
||||||
|
async def start(config, fn):
|
||||||
|
host = config.get('host', '127.0.0.1')
|
||||||
|
port = config.get('port', 6666)
|
||||||
|
server = await asyncio.start_server(fn, host, port)
|
||||||
|
|
||||||
|
addrs = ', '.join(str(sock.getsockname()) for sock in server.sockets)
|
||||||
|
print(f'Serving on {addrs}')
|
||||||
|
|
||||||
|
async with server:
|
||||||
|
await server.serve_forever()
|
Loading…
Reference in New Issue