Spaces:

HanaeRateau
/

Speech-to-Speech-FM

Sleeping

App Files Files Community

HanaeRateau commited on 17 days ago

Commit

9f5ff14

•

1 Parent(s): b9a8339

Adds textbox for LLM's answer

Browse files

Files changed (1) hide show

app.py +5 -5

app.py CHANGED Viewed

@@ -27,7 +27,6 @@ llmpipe = pipeline(
     "text-generation",
     model="google/gemma-2-2b-it",
     model_kwargs={"torch_dtype": torch.bfloat16},
-    stream=True,
     device=device
 )
@@ -123,7 +122,7 @@ def speech_to_speech(audioMic, audioFile):
     print(f'[speech_to_speech] Transcribed text {translated_text}')
     print(f'[speech_to_speech] LLM answer {answer}')
-    return (22050, synthesised_speech), answer
 with gr.Blocks() as demo:
     options = gr.WaveformOptions(sample_rate=22050)
@@ -146,7 +145,7 @@ with gr.Blocks() as demo:
                 with gr.Tab("Record Audio"):
                     audioMic = gr.Audio(sources="microphone", waveform_options=options, type="filepath")
                 with gr.Tab("Upload Audio"):
-                    audioFile = gr.Audio(sources="upload", waveform_options=gr.WaveformOptions(sample_rate=16000), type="filepath")
                 transcribeBtn = gr.Button("Submit", size='lg')
@@ -175,13 +174,14 @@ with gr.Blocks() as demo:
                 with gr.Tab("Record Audio"):
                     audioMic = gr.Audio(sources="microphone", waveform_options=options, type="filepath")
                 with gr.Tab("Upload Audio"):
-                    audioFile = gr.Audio(sources="upload", waveform_options=gr.WaveformOptions(sample_rate=16000), type="filepath")
                 translateBtn = gr.Button("Submit", size='lg')
             with gr.Column(scale=1):
                 textOutput = gr.Textbox(label="Transcribed text")
                 audioOutput = gr.Audio(waveform_options=options, type="numpy")
-        translateBtn.click(fn=speech_to_speech, inputs=[audioMic, audioFile], outputs=[audioOutput, textOutput], api_name="report_generation")
 demo.launch()

     "text-generation",
     model="google/gemma-2-2b-it",
     model_kwargs={"torch_dtype": torch.bfloat16},
     device=device
 )
     print(f'[speech_to_speech] Transcribed text {translated_text}')
     print(f'[speech_to_speech] LLM answer {answer}')
+    return (22050, synthesised_speech), translated_text, answer
 with gr.Blocks() as demo:
     options = gr.WaveformOptions(sample_rate=22050)
                 with gr.Tab("Record Audio"):
                     audioMic = gr.Audio(sources="microphone", waveform_options=options, type="filepath")
                 with gr.Tab("Upload Audio"):
+                    audioFile = gr.Audio(sources="upload", type="filepath")
                 transcribeBtn = gr.Button("Submit", size='lg')
                 with gr.Tab("Record Audio"):
                     audioMic = gr.Audio(sources="microphone", waveform_options=options, type="filepath")
                 with gr.Tab("Upload Audio"):
+                    audioFile = gr.Audio(sources="upload", type="filepath")
                 translateBtn = gr.Button("Submit", size='lg')
             with gr.Column(scale=1):
                 textOutput = gr.Textbox(label="Transcribed text")
+                textAnswer = gr.Textbox(label="Assistant's Answer")
                 audioOutput = gr.Audio(waveform_options=options, type="numpy")
+        translateBtn.click(fn=speech_to_speech, inputs=[audioMic, audioFile], outputs=[audioOutput, textOutput, textAnswer], api_name="report_generation")
 demo.launch()