Spaces:
Sleeping
Sleeping
fix tts
Browse files- .env_template +1 -0
- .gitignore +3 -1
- poetry.lock +51 -1
- pyproject.toml +1 -0
- tts/tts.py +52 -35
.env_template
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
TYPHOON_CHAT_API = *
|
|
|
|
| 2 |
DEBUG_MODE = false
|
| 3 |
OPENTHAIGPT_CHAT_API = *
|
| 4 |
BOTNOI_API_TOKEN = *
|
|
|
|
| 1 |
TYPHOON_CHAT_API = *
|
| 2 |
+
GROQ_CHAT_KEY = *
|
| 3 |
DEBUG_MODE = false
|
| 4 |
OPENTHAIGPT_CHAT_API = *
|
| 5 |
BOTNOI_API_TOKEN = *
|
.gitignore
CHANGED
|
@@ -8,4 +8,6 @@ __pycache__
|
|
| 8 |
|
| 9 |
# Files
|
| 10 |
speedtest.py
|
| 11 |
-
*.csv
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# Files
|
| 10 |
speedtest.py
|
| 11 |
+
*.csv
|
| 12 |
+
*.wav
|
| 13 |
+
*.mp3
|
poetry.lock
CHANGED
|
@@ -1994,6 +1994,56 @@ files = [
|
|
| 1994 |
[package.extras]
|
| 1995 |
windows-terminal = ["colorama (>=0.4.6)"]
|
| 1996 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1997 |
[[package]]
|
| 1998 |
name = "python-dateutil"
|
| 1999 |
version = "2.9.0.post0"
|
|
@@ -2820,4 +2870,4 @@ propcache = ">=0.2.0"
|
|
| 2820 |
[metadata]
|
| 2821 |
lock-version = "2.0"
|
| 2822 |
python-versions = "^3.10"
|
| 2823 |
-
content-hash = "
|
|
|
|
| 1994 |
[package.extras]
|
| 1995 |
windows-terminal = ["colorama (>=0.4.6)"]
|
| 1996 |
|
| 1997 |
+
[[package]]
|
| 1998 |
+
name = "pythainlp"
|
| 1999 |
+
version = "5.0.4"
|
| 2000 |
+
description = "Thai Natural Language Processing library"
|
| 2001 |
+
optional = false
|
| 2002 |
+
python-versions = ">=3.7"
|
| 2003 |
+
files = [
|
| 2004 |
+
{file = "pythainlp-5.0.4-py3-none-any.whl", hash = "sha256:5f036d558f673215c142c6e32fd38b111d674d94d64f1b03b409fdfed0fa8dcd"},
|
| 2005 |
+
{file = "pythainlp-5.0.4.tar.gz", hash = "sha256:2cd8e088d722617c6065225fffbaf2522bc20b8a3eff5bd2bcb251c40eccdce0"},
|
| 2006 |
+
]
|
| 2007 |
+
|
| 2008 |
+
[package.dependencies]
|
| 2009 |
+
requests = ">=2.22.0"
|
| 2010 |
+
tzdata = {version = "*", markers = "sys_platform == \"win32\""}
|
| 2011 |
+
|
| 2012 |
+
[package.extras]
|
| 2013 |
+
abbreviation = ["khamyo (>=0.2.0)"]
|
| 2014 |
+
attacut = ["attacut (>=1.0.6)"]
|
| 2015 |
+
benchmarks = ["PyYAML (>=5.3.1)", "numpy (>=1.22)", "pandas (>=0.24)"]
|
| 2016 |
+
coreference-resolution = ["fastcoref (>=2.1.5)", "spacy (>=3.0)"]
|
| 2017 |
+
dependency-parsing = ["spacy-thai (>=0.7.1)", "transformers (>=4.22.1)", "ufal.chu-liu-edmonds (>=1.0.2)"]
|
| 2018 |
+
el = ["multiel (>=0.5)"]
|
| 2019 |
+
esupar = ["esupar (>=1.3.8)", "numpy", "transformers (>=4.22.1)"]
|
| 2020 |
+
full = ["PyYAML (>=5.3.1)", "attacut (>=1.0.4)", "bpemb (>=0.3.2)", "emoji (>=0.5.1)", "epitran (>=1.1)", "fairseq (>=0.10.0)", "fastai (<2.0)", "fastcoref (>=2.1.5)", "gensim (>=4.0.0)", "khamyo (>=0.2.0)", "nlpo3 (>=1.2.2)", "nltk (>=3.3)", "numpy (>=1.22)", "onnxruntime (>=1.10.0)", "oskut (>=1.3)", "pandas (>=0.24)", "panphon (>=0.20.0)", "phunspell (>=0.1.6)", "pyicu (>=2.3)", "sacremoses (>=0.0.41)", "sefr-cut (>=1.1)", "sentence-transformers (>=2.2.2)", "sentencepiece (>=0.1.91)", "spacy (>=3.0)", "spacy-thai (>=0.7.1)", "spylls (>=0.1.5)", "ssg (>=0.0.8)", "symspellpy (>=6.7.6)", "thai-nner", "torch (>=1.0.0)", "transformers (>=4.22.1)", "ufal.chu-liu-edmonds (>=1.0.2)", "wtpsplit (>=1.0.1)", "wunsen (>=0.0.3)"]
|
| 2021 |
+
generate = ["fastai (<2.0)"]
|
| 2022 |
+
icu = ["pyicu (>=2.3)"]
|
| 2023 |
+
ipa = ["epitran (>=1.1)"]
|
| 2024 |
+
ml = ["numpy (>=1.22)", "torch (>=1.0.0)"]
|
| 2025 |
+
mt5 = ["sentencepiece (>=0.1.91)", "transformers (>=4.6.0)"]
|
| 2026 |
+
nlpo3 = ["nlpo3 (>=1.2.2)"]
|
| 2027 |
+
onnx = ["numpy (>=1.22)", "onnxruntime (>=1.10.0)", "sentencepiece (>=0.1.91)"]
|
| 2028 |
+
oskut = ["oskut (>=1.3)"]
|
| 2029 |
+
sefr-cut = ["sefr-cut (>=1.1)"]
|
| 2030 |
+
spacy-thai = ["spacy-thai (>=0.7.1)"]
|
| 2031 |
+
spell = ["phunspell (>=0.1.6)", "spylls (>=0.1.5)", "symspellpy (>=6.7.6)"]
|
| 2032 |
+
ssg = ["ssg (>=0.0.8)"]
|
| 2033 |
+
textaugment = ["bpemb", "gensim (>=4.0.0)"]
|
| 2034 |
+
thai-nner = ["thai-nner"]
|
| 2035 |
+
thai2fit = ["emoji (>=0.5.1)", "gensim (>=4.0.0)", "numpy (>=1.22)"]
|
| 2036 |
+
thai2rom = ["numpy (>=1.22)", "torch (>=1.0.0)"]
|
| 2037 |
+
transformers-ud = ["transformers (>=4.22.1)", "ufal.chu-liu-edmonds (>=1.0.2)"]
|
| 2038 |
+
translate = ["fairseq (>=0.10.0)", "sacremoses (>=0.0.41)", "sentencepiece (>=0.1.91)", "torch (>=1.0.0)", "transformers (>=4.6.0)"]
|
| 2039 |
+
wangchanberta = ["sentencepiece (>=0.1.91)", "transformers (>=4.6.0)"]
|
| 2040 |
+
wangchanglm = ["pandas (>=0.24)", "sentencepiece (>=0.1.91)", "transformers (>=4.6.0)"]
|
| 2041 |
+
word-approximation = ["panphon (>=0.20.0)"]
|
| 2042 |
+
wordnet = ["nltk (>=3.3)"]
|
| 2043 |
+
wsd = ["sentence-transformers (>=2.2.2)"]
|
| 2044 |
+
wtp = ["transformers (>=4.6.0)", "wtpsplit (>=1.0.1)"]
|
| 2045 |
+
wunsen = ["wunsen (>=0.0.1)"]
|
| 2046 |
+
|
| 2047 |
[[package]]
|
| 2048 |
name = "python-dateutil"
|
| 2049 |
version = "2.9.0.post0"
|
|
|
|
| 2870 |
[metadata]
|
| 2871 |
lock-version = "2.0"
|
| 2872 |
python-versions = "^3.10"
|
| 2873 |
+
content-hash = "e0d24b26c313871a6ec8a9d37d495f8a11ca13efa6e2e82cfa872f77935955e9"
|
pyproject.toml
CHANGED
|
@@ -16,6 +16,7 @@ fastapi = "^0.115.5"
|
|
| 16 |
uvicorn = "^0.32.0"
|
| 17 |
gradio = "^5.8.0"
|
| 18 |
langchain-groq = "^0.2.1"
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
[build-system]
|
|
|
|
| 16 |
uvicorn = "^0.32.0"
|
| 17 |
gradio = "^5.8.0"
|
| 18 |
langchain-groq = "^0.2.1"
|
| 19 |
+
pythainlp = "^5.0.4"
|
| 20 |
|
| 21 |
|
| 22 |
[build-system]
|
tts/tts.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
from fastapi import FastAPI, HTTPException
|
| 2 |
from pythainlp.tokenize import sent_tokenize
|
| 3 |
from pydantic import BaseModel , Field
|
|
@@ -7,6 +8,8 @@ import uuid
|
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
import os
|
| 9 |
|
|
|
|
|
|
|
| 10 |
# Load environment variables
|
| 11 |
load_dotenv()
|
| 12 |
|
|
@@ -62,8 +65,8 @@ def generate_voice(audio_id, text, text_delay, speaker, volume, speed, type_medi
|
|
| 62 |
def download_mp3(url, output_path):
|
| 63 |
headers = {
|
| 64 |
"Accept-Encoding": "identity;q=1, *;q=0",
|
| 65 |
-
"Range": "bytes=0-",
|
| 66 |
"Referer": "https://voice.botnoi.ai/",
|
|
|
|
| 67 |
}
|
| 68 |
|
| 69 |
response = requests.get(url, headers=headers, stream=True)
|
|
@@ -73,12 +76,15 @@ def download_mp3(url, output_path):
|
|
| 73 |
if chunk: # Filter out keep-alive chunks
|
| 74 |
file.write(chunk)
|
| 75 |
else:
|
| 76 |
-
raise HTTPException(status_code=response.status_code, detail="Failed to download MP3")
|
| 77 |
|
| 78 |
# FastAPI endpoint to generate and download voice
|
| 79 |
@app.post("/generate_voice_botnoi/")
|
| 80 |
def generate_voice_endpoint(request: VoiceRequest):
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
| 82 |
audio_url = generate_voice(
|
| 83 |
audio_id=request.audio_id,
|
| 84 |
text=request.text,
|
|
@@ -92,7 +98,10 @@ def generate_voice_endpoint(request: VoiceRequest):
|
|
| 92 |
)
|
| 93 |
|
| 94 |
# Generate unique filename for the MP3
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
| 96 |
download_mp3(audio_url, output_file)
|
| 97 |
|
| 98 |
return FileResponse(output_file, media_type="audio/mpeg", filename="output.mp3")
|
|
@@ -105,10 +114,6 @@ class Vaja9Request(BaseModel):
|
|
| 105 |
phrase_break: int = 0
|
| 106 |
audiovisual: int = 0
|
| 107 |
|
| 108 |
-
def split_text_into_chunks(text: str, chunk_size: int = 20) -> list:
|
| 109 |
-
words = text.split()
|
| 110 |
-
return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
|
| 111 |
-
|
| 112 |
def generate_vaja9_voice(text: str, speaker: int, phrase_break: int, audiovisual: int):
|
| 113 |
url = 'https://api.aiforthai.in.th/vaja9/synth_audiovisual'
|
| 114 |
headers = {
|
|
@@ -136,7 +141,10 @@ def generate_vaja9_voice(text: str, speaker: int, phrase_break: int, audiovisual
|
|
| 136 |
raise HTTPException(status_code=502, detail=f"Bad Gateway - Connection error: {str(e)}")
|
| 137 |
|
| 138 |
def download_vaja9_wav(url: str, output_path: str):
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
| 140 |
try:
|
| 141 |
response = requests.get(url, headers=headers, timeout=60) # Increased timeout to 60 seconds
|
| 142 |
if response.status_code == 200:
|
|
@@ -153,34 +161,43 @@ def download_vaja9_wav(url: str, output_path: str):
|
|
| 153 |
|
| 154 |
@app.post("/generate_voice_vaja9/")
|
| 155 |
def generate_voice_vaja9_endpoint(request: Vaja9Request):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
try:
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
-
#
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
phrase_break=request.phrase_break,
|
| 167 |
-
audiovisual=request.audiovisual
|
| 168 |
-
)
|
| 169 |
-
|
| 170 |
-
# Generate unique filename for each chunk
|
| 171 |
-
output_file = f"{uuid.uuid4()}.wav"
|
| 172 |
-
download_vaja9_wav(audio_url, output_file)
|
| 173 |
-
output_files.append(output_file)
|
| 174 |
|
| 175 |
-
|
| 176 |
-
if len(output_files) == 1:
|
| 177 |
-
return FileResponse(output_files[0], media_type="audio/wav", filename="output.wav")
|
| 178 |
-
|
| 179 |
-
# TODO: If multiple chunks, they should be combined into a single audio file
|
| 180 |
-
# For now, return the first chunk
|
| 181 |
-
return FileResponse(output_files[0], media_type="audio/wav", filename="output.wav")
|
| 182 |
|
| 183 |
-
except HTTPException:
|
| 184 |
-
raise
|
| 185 |
except Exception as e:
|
| 186 |
-
raise HTTPException(status_code=502, detail=f"Bad Gateway - Unexpected error: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
from fastapi import FastAPI, HTTPException
|
| 3 |
from pythainlp.tokenize import sent_tokenize
|
| 4 |
from pydantic import BaseModel , Field
|
|
|
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
import os
|
| 10 |
|
| 11 |
+
import uvicorn
|
| 12 |
+
|
| 13 |
# Load environment variables
|
| 14 |
load_dotenv()
|
| 15 |
|
|
|
|
| 65 |
def download_mp3(url, output_path):
|
| 66 |
headers = {
|
| 67 |
"Accept-Encoding": "identity;q=1, *;q=0",
|
|
|
|
| 68 |
"Referer": "https://voice.botnoi.ai/",
|
| 69 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
|
| 70 |
}
|
| 71 |
|
| 72 |
response = requests.get(url, headers=headers, stream=True)
|
|
|
|
| 76 |
if chunk: # Filter out keep-alive chunks
|
| 77 |
file.write(chunk)
|
| 78 |
else:
|
| 79 |
+
raise HTTPException(status_code=response.status_code, detail=f"{response.status_code} Failed to download MP3")
|
| 80 |
|
| 81 |
# FastAPI endpoint to generate and download voice
|
| 82 |
@app.post("/generate_voice_botnoi/")
|
| 83 |
def generate_voice_endpoint(request: VoiceRequest):
|
| 84 |
+
|
| 85 |
+
text_delay = request.text
|
| 86 |
+
# text_delay = auto_generate_text_delay_with_pythainlp(request.text)
|
| 87 |
+
print("Text delay:", text_delay)
|
| 88 |
audio_url = generate_voice(
|
| 89 |
audio_id=request.audio_id,
|
| 90 |
text=request.text,
|
|
|
|
| 98 |
)
|
| 99 |
|
| 100 |
# Generate unique filename for the MP3
|
| 101 |
+
print("botnoi url: ", audio_url)
|
| 102 |
+
output_dir = "generated_voice/botnoi"
|
| 103 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 104 |
+
output_file = os.path.join(output_dir, f"{int(time.time())}.mp3")
|
| 105 |
download_mp3(audio_url, output_file)
|
| 106 |
|
| 107 |
return FileResponse(output_file, media_type="audio/mpeg", filename="output.mp3")
|
|
|
|
| 114 |
phrase_break: int = 0
|
| 115 |
audiovisual: int = 0
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
def generate_vaja9_voice(text: str, speaker: int, phrase_break: int, audiovisual: int):
|
| 118 |
url = 'https://api.aiforthai.in.th/vaja9/synth_audiovisual'
|
| 119 |
headers = {
|
|
|
|
| 141 |
raise HTTPException(status_code=502, detail=f"Bad Gateway - Connection error: {str(e)}")
|
| 142 |
|
| 143 |
def download_vaja9_wav(url: str, output_path: str):
|
| 144 |
+
api_key = os.getenv("VAJA9_API_KEY")
|
| 145 |
+
if not api_key:
|
| 146 |
+
raise HTTPException(status_code=500, detail="VAJA9_API_KEY environment variable not set")
|
| 147 |
+
headers = {'Apikey': api_key}
|
| 148 |
try:
|
| 149 |
response = requests.get(url, headers=headers, timeout=60) # Increased timeout to 60 seconds
|
| 150 |
if response.status_code == 200:
|
|
|
|
| 161 |
|
| 162 |
@app.post("/generate_voice_vaja9/")
|
| 163 |
def generate_voice_vaja9_endpoint(request: Vaja9Request):
|
| 164 |
+
"""
|
| 165 |
+
Generate a voice file using the Vaja9 endpoint.
|
| 166 |
+
- text (str): ข้อความที่ต้องการสังเคราะห์เสียง (สูงสุดไม่เกิน 300 ตัวอักษร)
|
| 167 |
+
- speaker (int): ประเภทของเสียงที่ต้องการ
|
| 168 |
+
0 : เสียงผู้ชาย
|
| 169 |
+
1 : เสียงผู้หญิง
|
| 170 |
+
2 : เสียงเด็กผู้ชาย
|
| 171 |
+
3 : เสียงเด็กผู้หญิง
|
| 172 |
+
- phrase_break (int): ประเภทของการหยุดเว้นวรรค
|
| 173 |
+
0 : หยุดเว้นวรรคแบบอัตโนมัติ
|
| 174 |
+
1 : ไม่หยุดเว้นวรรค
|
| 175 |
+
- audiovisual (int): ประเภทของโมเดล
|
| 176 |
+
0 : โมเดลสังเคราะห์เสียง
|
| 177 |
+
1 : โมเดลสังเคราะห์เสียง และภาพ
|
| 178 |
+
"""
|
| 179 |
+
|
| 180 |
try:
|
| 181 |
+
audio_url = generate_vaja9_voice(
|
| 182 |
+
text=request.text,
|
| 183 |
+
speaker=request.speaker,
|
| 184 |
+
phrase_break=request.phrase_break,
|
| 185 |
+
audiovisual=request.audiovisual
|
| 186 |
+
)
|
| 187 |
|
| 188 |
+
# Generate unique filename for the WAV file
|
| 189 |
+
output_dir = "generated_voice/vaja9"
|
| 190 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 191 |
+
output_file = os.path.join(output_dir, f"{int(time.time())}.wav")
|
| 192 |
+
download_vaja9_wav(audio_url, output_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
+
return FileResponse(output_file, media_type="audio/wav", filename="output.wav")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
+
except HTTPException as e:
|
| 197 |
+
raise e
|
| 198 |
except Exception as e:
|
| 199 |
+
raise HTTPException(status_code=502, detail=f"Bad Gateway - Unexpected error: {str(e)}")
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
if __name__ == "__main__":
|
| 203 |
+
uvicorn.run("tts:app", host="0.0.0.0", port=8001, reload=True)
|