Spaces:
Paused
Paused
ChandimaPrabath
commited on
Commit
•
fad2e64
1
Parent(s):
31c5df5
init
Browse files- .gitignore +4 -1
- app.py +46 -4
.gitignore
CHANGED
@@ -1,2 +1,5 @@
|
|
1 |
# tts-client
|
2 |
-
tts-client.py
|
|
|
|
|
|
|
|
1 |
# tts-client
|
2 |
+
tts-client.py
|
3 |
+
|
4 |
+
# test audio
|
5 |
+
output.wav
|
app.py
CHANGED
@@ -1,7 +1,8 @@
|
|
|
|
1 |
import wave
|
2 |
import numpy as np
|
3 |
from io import BytesIO
|
4 |
-
from flask import Flask, request, send_file
|
5 |
from flask_cors import CORS
|
6 |
from huggingface_hub import hf_hub_download
|
7 |
from piper import PiperVoice
|
@@ -9,13 +10,20 @@ from piper import PiperVoice
|
|
9 |
app = Flask(__name__)
|
10 |
CORS(app)
|
11 |
|
|
|
|
|
|
|
|
|
12 |
def synthesize_speech(text, sentence_silence, length_scale):
|
|
|
13 |
model_path = hf_hub_download(repo_id="csukuangfj/vits-piper-en_US-lessac-medium", filename="en_US-lessac-medium.onnx")
|
14 |
config_path = hf_hub_download(repo_id="csukuangfj/vits-piper-en_US-lessac-medium", filename="en_US-lessac-medium.onnx.json")
|
15 |
|
|
|
16 |
voice = PiperVoice.load(model_path, config_path)
|
17 |
|
18 |
buffer = BytesIO()
|
|
|
19 |
with wave.open(buffer, 'wb') as wav_file:
|
20 |
wav_file.setframerate(voice.config.sample_rate)
|
21 |
wav_file.setsampwidth(2)
|
@@ -23,22 +31,56 @@ def synthesize_speech(text, sentence_silence, length_scale):
|
|
23 |
voice.synthesize(text, wav_file, sentence_silence=sentence_silence, length_scale=length_scale)
|
24 |
|
25 |
buffer.seek(0)
|
|
|
26 |
return buffer
|
27 |
|
28 |
@app.route('/')
|
29 |
def index():
|
30 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
@app.route('/tts', methods=['POST'])
|
33 |
def tts():
|
34 |
data = request.json
|
|
|
|
|
|
|
|
|
35 |
text = data.get('text', '')
|
36 |
sentence_silence = float(data.get('sentence_silence', 0.1))
|
37 |
length_scale = float(data.get('length_scale', 1.0))
|
38 |
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
return send_file(audio_buffer, mimetype="audio/wav", as_attachment=True, download_name="output.wav")
|
42 |
|
43 |
if __name__ == '__main__':
|
44 |
-
app.run(host='0.0.0.0', port=
|
|
|
1 |
+
import logging
|
2 |
import wave
|
3 |
import numpy as np
|
4 |
from io import BytesIO
|
5 |
+
from flask import Flask, request, send_file, jsonify
|
6 |
from flask_cors import CORS
|
7 |
from huggingface_hub import hf_hub_download
|
8 |
from piper import PiperVoice
|
|
|
10 |
app = Flask(__name__)
|
11 |
CORS(app)
|
12 |
|
13 |
+
# Setup logging
|
14 |
+
logging.basicConfig(level=logging.DEBUG)
|
15 |
+
logger = logging.getLogger(__name__)
|
16 |
+
|
17 |
def synthesize_speech(text, sentence_silence, length_scale):
|
18 |
+
logger.debug("Downloading model and config files...")
|
19 |
model_path = hf_hub_download(repo_id="csukuangfj/vits-piper-en_US-lessac-medium", filename="en_US-lessac-medium.onnx")
|
20 |
config_path = hf_hub_download(repo_id="csukuangfj/vits-piper-en_US-lessac-medium", filename="en_US-lessac-medium.onnx.json")
|
21 |
|
22 |
+
logger.debug("Loading PiperVoice model...")
|
23 |
voice = PiperVoice.load(model_path, config_path)
|
24 |
|
25 |
buffer = BytesIO()
|
26 |
+
logger.debug("Synthesizing speech...")
|
27 |
with wave.open(buffer, 'wb') as wav_file:
|
28 |
wav_file.setframerate(voice.config.sample_rate)
|
29 |
wav_file.setsampwidth(2)
|
|
|
31 |
voice.synthesize(text, wav_file, sentence_silence=sentence_silence, length_scale=length_scale)
|
32 |
|
33 |
buffer.seek(0)
|
34 |
+
logger.debug("Speech synthesis complete.")
|
35 |
return buffer
|
36 |
|
37 |
@app.route('/')
|
38 |
def index():
|
39 |
+
return '''
|
40 |
+
<!DOCTYPE html>
|
41 |
+
<html lang="en">
|
42 |
+
<head>
|
43 |
+
<meta charset="UTF-8">
|
44 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
45 |
+
<title>TTS Server</title>
|
46 |
+
<style>
|
47 |
+
body { font-family: Arial, sans-serif; margin: 40px; }
|
48 |
+
h1 { color: #333; }
|
49 |
+
p { font-size: 1.2em; }
|
50 |
+
code { background: #f4f4f4; padding: 2px 4px; border-radius: 4px; }
|
51 |
+
</style>
|
52 |
+
</head>
|
53 |
+
<body>
|
54 |
+
<h1>TTS Server is Running</h1>
|
55 |
+
<p>Use the <code>/tts</code> endpoint to synthesize speech.</p>
|
56 |
+
<p>Send a POST request with JSON data containing the <code>text</code>, <code>sentence_silence</code>, and <code>length_scale</code> parameters.</p>
|
57 |
+
</body>
|
58 |
+
</html>
|
59 |
+
'''
|
60 |
|
61 |
@app.route('/tts', methods=['POST'])
|
62 |
def tts():
|
63 |
data = request.json
|
64 |
+
if not data:
|
65 |
+
logger.error("No data received in request.")
|
66 |
+
return jsonify({"error": "No data provided"}), 400
|
67 |
+
|
68 |
text = data.get('text', '')
|
69 |
sentence_silence = float(data.get('sentence_silence', 0.1))
|
70 |
length_scale = float(data.get('length_scale', 1.0))
|
71 |
|
72 |
+
if not text:
|
73 |
+
logger.error("No text provided in request.")
|
74 |
+
return jsonify({"error": "Text parameter is required"}), 400
|
75 |
+
|
76 |
+
logger.info(f"Received request: text={text}, sentence_silence={sentence_silence}, length_scale={length_scale}")
|
77 |
+
try:
|
78 |
+
audio_buffer = synthesize_speech(text, sentence_silence, length_scale)
|
79 |
+
except Exception as e:
|
80 |
+
logger.exception("Error during speech synthesis.")
|
81 |
+
return jsonify({"error": str(e)}), 500
|
82 |
|
83 |
return send_file(audio_buffer, mimetype="audio/wav", as_attachment=True, download_name="output.wav")
|
84 |
|
85 |
if __name__ == '__main__':
|
86 |
+
app.run(debug=True, host='0.0.0.0', port=7860)
|