HanaeRateau commited on
Commit
9f5ff14
1 Parent(s): b9a8339

Adds textbox for LLM's answer

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -27,7 +27,6 @@ llmpipe = pipeline(
27
  "text-generation",
28
  model="google/gemma-2-2b-it",
29
  model_kwargs={"torch_dtype": torch.bfloat16},
30
- stream=True,
31
  device=device
32
  )
33
 
@@ -123,7 +122,7 @@ def speech_to_speech(audioMic, audioFile):
123
  print(f'[speech_to_speech] Transcribed text {translated_text}')
124
  print(f'[speech_to_speech] LLM answer {answer}')
125
 
126
- return (22050, synthesised_speech), answer
127
 
128
  with gr.Blocks() as demo:
129
  options = gr.WaveformOptions(sample_rate=22050)
@@ -146,7 +145,7 @@ with gr.Blocks() as demo:
146
  with gr.Tab("Record Audio"):
147
  audioMic = gr.Audio(sources="microphone", waveform_options=options, type="filepath")
148
  with gr.Tab("Upload Audio"):
149
- audioFile = gr.Audio(sources="upload", waveform_options=gr.WaveformOptions(sample_rate=16000), type="filepath")
150
 
151
  transcribeBtn = gr.Button("Submit", size='lg')
152
 
@@ -175,13 +174,14 @@ with gr.Blocks() as demo:
175
  with gr.Tab("Record Audio"):
176
  audioMic = gr.Audio(sources="microphone", waveform_options=options, type="filepath")
177
  with gr.Tab("Upload Audio"):
178
- audioFile = gr.Audio(sources="upload", waveform_options=gr.WaveformOptions(sample_rate=16000), type="filepath")
179
 
180
  translateBtn = gr.Button("Submit", size='lg')
181
  with gr.Column(scale=1):
182
  textOutput = gr.Textbox(label="Transcribed text")
 
183
  audioOutput = gr.Audio(waveform_options=options, type="numpy")
184
 
185
- translateBtn.click(fn=speech_to_speech, inputs=[audioMic, audioFile], outputs=[audioOutput, textOutput], api_name="report_generation")
186
 
187
  demo.launch()
 
27
  "text-generation",
28
  model="google/gemma-2-2b-it",
29
  model_kwargs={"torch_dtype": torch.bfloat16},
 
30
  device=device
31
  )
32
 
 
122
  print(f'[speech_to_speech] Transcribed text {translated_text}')
123
  print(f'[speech_to_speech] LLM answer {answer}')
124
 
125
+ return (22050, synthesised_speech), translated_text, answer
126
 
127
  with gr.Blocks() as demo:
128
  options = gr.WaveformOptions(sample_rate=22050)
 
145
  with gr.Tab("Record Audio"):
146
  audioMic = gr.Audio(sources="microphone", waveform_options=options, type="filepath")
147
  with gr.Tab("Upload Audio"):
148
+ audioFile = gr.Audio(sources="upload", type="filepath")
149
 
150
  transcribeBtn = gr.Button("Submit", size='lg')
151
 
 
174
  with gr.Tab("Record Audio"):
175
  audioMic = gr.Audio(sources="microphone", waveform_options=options, type="filepath")
176
  with gr.Tab("Upload Audio"):
177
+ audioFile = gr.Audio(sources="upload", type="filepath")
178
 
179
  translateBtn = gr.Button("Submit", size='lg')
180
  with gr.Column(scale=1):
181
  textOutput = gr.Textbox(label="Transcribed text")
182
+ textAnswer = gr.Textbox(label="Assistant's Answer")
183
  audioOutput = gr.Audio(waveform_options=options, type="numpy")
184
 
185
+ translateBtn.click(fn=speech_to_speech, inputs=[audioMic, audioFile], outputs=[audioOutput, textOutput, textAnswer], api_name="report_generation")
186
 
187
  demo.launch()