Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,7 @@ from huggingface_hub import hf_hub_download, list_repo_files
|
|
12 |
from pydub import AudioSegment
|
13 |
import io
|
14 |
import tempfile
|
|
|
15 |
|
16 |
#tts cpu model
|
17 |
tts_model_str = "en_us_hifi_jets_cpu.addon"
|
@@ -64,6 +65,20 @@ def combine_audio_files(audio_files):
|
|
64 |
os.remove(audio_file) # Remove temporary files
|
65 |
return combined
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
#guardrail model
|
68 |
guard_llm = "llama-3.1-8b-instant"
|
69 |
|
@@ -123,9 +138,7 @@ async def greet(product,description):
|
|
123 |
audio_file = text_to_speech(a_list[0])
|
124 |
audio_files.append(audio_file)
|
125 |
final_audio = combine_audio_files(audio_files)
|
126 |
-
|
127 |
-
final_audio.export(output_file, format="mp3")
|
128 |
-
yield final_audio
|
129 |
else:
|
130 |
audio_files = []
|
131 |
output = llm.create_chat_completion(
|
@@ -147,9 +160,7 @@ async def greet(product,description):
|
|
147 |
audio_file = text_to_speech(delta.get('content', ''))
|
148 |
audio_files.append(audio_file)
|
149 |
final_audio = combine_audio_files(audio_files)
|
150 |
-
|
151 |
-
final_audio.export(output_file, format="mp3")
|
152 |
-
yield final_audio
|
153 |
|
154 |
demo = gr.Interface(fn=greet, inputs=["text","text"], outputs=gr.Audio(), concurrency_limit=10)
|
155 |
demo.launch()
|
|
|
12 |
from pydub import AudioSegment
|
13 |
import io
|
14 |
import tempfile
|
15 |
+
import pydub
|
16 |
|
17 |
#tts cpu model
|
18 |
tts_model_str = "en_us_hifi_jets_cpu.addon"
|
|
|
65 |
os.remove(audio_file) # Remove temporary files
|
66 |
return combined
|
67 |
|
68 |
+
def postprocess(prediction_value):
|
69 |
+
if isinstance(prediction_value, pydub.AudioSegment):
|
70 |
+
# Convert AudioSegment to numpy array
|
71 |
+
samples = np.array(prediction_value.get_array_of_samples())
|
72 |
+
# If stereo, take the mean of both channels
|
73 |
+
if prediction_value.channels == 2:
|
74 |
+
samples = np.mean(samples.reshape(-1, 2), axis=1)
|
75 |
+
# Return as tuple (sample_rate, samples)
|
76 |
+
return (prediction_value.frame_rate, samples.astype(np.float32))
|
77 |
+
else:
|
78 |
+
raise ValueError(f"Cannot process {type(prediction_value)} as Audio")
|
79 |
+
|
80 |
+
|
81 |
+
|
82 |
#guardrail model
|
83 |
guard_llm = "llama-3.1-8b-instant"
|
84 |
|
|
|
138 |
audio_file = text_to_speech(a_list[0])
|
139 |
audio_files.append(audio_file)
|
140 |
final_audio = combine_audio_files(audio_files)
|
141 |
+
yield postprocess(final_audio)
|
|
|
|
|
142 |
else:
|
143 |
audio_files = []
|
144 |
output = llm.create_chat_completion(
|
|
|
160 |
audio_file = text_to_speech(delta.get('content', ''))
|
161 |
audio_files.append(audio_file)
|
162 |
final_audio = combine_audio_files(audio_files)
|
163 |
+
yield postprocess(final_audio)
|
|
|
|
|
164 |
|
165 |
demo = gr.Interface(fn=greet, inputs=["text","text"], outputs=gr.Audio(), concurrency_limit=10)
|
166 |
demo.launch()
|