MediPlusPlus commited on
Commit
d9846be
1 Parent(s): 9c6aa3b

ADDED TEXTBOX

Browse files
Files changed (1) hide show
  1. app.py +14 -12
app.py CHANGED
@@ -237,27 +237,29 @@ def predict_category(que, input_image):
237
 
238
  return preds[0]
239
 
240
-
241
- def combine(audio, input_image):
242
- que = transcribe_audio(audio)
243
- # que = "What is the animal here?"
 
244
 
245
  image = Image.fromarray(input_image).convert('RGB')
246
  category = predict_category(que, image)
247
-
248
  answer = predict_answer(0, que, image)
249
 
250
- # print(category)
251
-
252
  tts = gTTS(answer)
253
  tts.save('answer.mp3')
 
254
  return que, answer, 'answer.mp3'
255
 
256
-
257
-
258
- # Define the Gradio interface for recording audio and displaying the transcription
259
- model_interface = gr.Interface(fn=combine, inputs=[gr.Microphone(label="Ask your question"),gr.Image(label="Upload the image")], outputs=[gr.Text(label="Transcribed Question"), gr.Text(label="Answer"), gr.Audio(label="Audio Answer")])
260
- # image_upload_interface = gr.Interface(fn=upload_image, inputs=gr.Image(label="Upload the image"), outputs="text")
 
 
 
261
 
262
  # Launch the Gradio interface
263
  model_interface.launch(debug=True)
 
237
 
238
  return preds[0]
239
 
240
+ def combine(audio, input_image, text_question=""):
241
+ if audio:
242
+ que = transcribe_audio(audio)
243
+ else:
244
+ que = text_question
245
 
246
  image = Image.fromarray(input_image).convert('RGB')
247
  category = predict_category(que, image)
 
248
  answer = predict_answer(0, que, image)
249
 
 
 
250
  tts = gTTS(answer)
251
  tts.save('answer.mp3')
252
+
253
  return que, answer, 'answer.mp3'
254
 
255
+ # Define the Gradio interface for recording audio, text input, and image upload
256
+ model_interface = gr.Interface(fn=combine,
257
+ inputs=[gr.Microphone(label="Ask your question"),
258
+ gr.Image(label="Upload the image"),
259
+ gr.Textbox(label="Text Question")],
260
+ outputs=[gr.Text(label="Transcribed Question"),
261
+ gr.Text(label="Answer"),
262
+ gr.Audio(label="Audio Answer")])
263
 
264
  # Launch the Gradio interface
265
  model_interface.launch(debug=True)