Spaces:
Sleeping
Sleeping
HanaeRateau
commited on
Commit
•
9f5ff14
1
Parent(s):
b9a8339
Adds textbox for LLM's answer
Browse files
app.py
CHANGED
@@ -27,7 +27,6 @@ llmpipe = pipeline(
|
|
27 |
"text-generation",
|
28 |
model="google/gemma-2-2b-it",
|
29 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
30 |
-
stream=True,
|
31 |
device=device
|
32 |
)
|
33 |
|
@@ -123,7 +122,7 @@ def speech_to_speech(audioMic, audioFile):
|
|
123 |
print(f'[speech_to_speech] Transcribed text {translated_text}')
|
124 |
print(f'[speech_to_speech] LLM answer {answer}')
|
125 |
|
126 |
-
return (22050, synthesised_speech), answer
|
127 |
|
128 |
with gr.Blocks() as demo:
|
129 |
options = gr.WaveformOptions(sample_rate=22050)
|
@@ -146,7 +145,7 @@ with gr.Blocks() as demo:
|
|
146 |
with gr.Tab("Record Audio"):
|
147 |
audioMic = gr.Audio(sources="microphone", waveform_options=options, type="filepath")
|
148 |
with gr.Tab("Upload Audio"):
|
149 |
-
audioFile = gr.Audio(sources="upload",
|
150 |
|
151 |
transcribeBtn = gr.Button("Submit", size='lg')
|
152 |
|
@@ -175,13 +174,14 @@ with gr.Blocks() as demo:
|
|
175 |
with gr.Tab("Record Audio"):
|
176 |
audioMic = gr.Audio(sources="microphone", waveform_options=options, type="filepath")
|
177 |
with gr.Tab("Upload Audio"):
|
178 |
-
audioFile = gr.Audio(sources="upload",
|
179 |
|
180 |
translateBtn = gr.Button("Submit", size='lg')
|
181 |
with gr.Column(scale=1):
|
182 |
textOutput = gr.Textbox(label="Transcribed text")
|
|
|
183 |
audioOutput = gr.Audio(waveform_options=options, type="numpy")
|
184 |
|
185 |
-
translateBtn.click(fn=speech_to_speech, inputs=[audioMic, audioFile], outputs=[audioOutput, textOutput], api_name="report_generation")
|
186 |
|
187 |
demo.launch()
|
|
|
27 |
"text-generation",
|
28 |
model="google/gemma-2-2b-it",
|
29 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
|
|
30 |
device=device
|
31 |
)
|
32 |
|
|
|
122 |
print(f'[speech_to_speech] Transcribed text {translated_text}')
|
123 |
print(f'[speech_to_speech] LLM answer {answer}')
|
124 |
|
125 |
+
return (22050, synthesised_speech), translated_text, answer
|
126 |
|
127 |
with gr.Blocks() as demo:
|
128 |
options = gr.WaveformOptions(sample_rate=22050)
|
|
|
145 |
with gr.Tab("Record Audio"):
|
146 |
audioMic = gr.Audio(sources="microphone", waveform_options=options, type="filepath")
|
147 |
with gr.Tab("Upload Audio"):
|
148 |
+
audioFile = gr.Audio(sources="upload", type="filepath")
|
149 |
|
150 |
transcribeBtn = gr.Button("Submit", size='lg')
|
151 |
|
|
|
174 |
with gr.Tab("Record Audio"):
|
175 |
audioMic = gr.Audio(sources="microphone", waveform_options=options, type="filepath")
|
176 |
with gr.Tab("Upload Audio"):
|
177 |
+
audioFile = gr.Audio(sources="upload", type="filepath")
|
178 |
|
179 |
translateBtn = gr.Button("Submit", size='lg')
|
180 |
with gr.Column(scale=1):
|
181 |
textOutput = gr.Textbox(label="Transcribed text")
|
182 |
+
textAnswer = gr.Textbox(label="Assistant's Answer")
|
183 |
audioOutput = gr.Audio(waveform_options=options, type="numpy")
|
184 |
|
185 |
+
translateBtn.click(fn=speech_to_speech, inputs=[audioMic, audioFile], outputs=[audioOutput, textOutput, textAnswer], api_name="report_generation")
|
186 |
|
187 |
demo.launch()
|