ixxan commited on
Commit
647ec6f
1 Parent(s): 8e8c9ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -104,9 +104,9 @@ def flan_t5_complete_sentence(question, answer):
104
  def vqa_main(image, question):
105
  en_question, question_src_lang = google_translate(question, dest='en')
106
  dest_lang = find_dest_language(en_question, question_src_lang)
107
- vqa_input = remove_language_phrase(en_question)
108
- vqa_answer = vilt_vqa(image, vqa_input)
109
- llm_answer = flan_t5_complete_sentence(en_question, vqa_answer)
110
  final_answer, answer_src_lang = google_translate(llm_answer, dest=dest_lang)
111
  logger.info("Final Answer: " + final_answer)
112
  return final_answer
@@ -131,7 +131,7 @@ torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/0000000397
131
  image = gr.Image(type="pil")
132
  question = gr.Textbox(label="Question")
133
  answer = gr.Textbox(label="Predicted answer")
134
- examples = [["apple.jpg", "In French, what is in my hand?"], ["cats.jpg", "What are the cats doing, in German?"], ["apple.jpg", "What color is this? Answer in Turkish."], ["cats.jpg", "How many cats are here?"]]
135
 
136
  demo = gr.Interface(fn=vqa_main,
137
  inputs=[image, question],
 
104
  def vqa_main(image, question):
105
  en_question, question_src_lang = google_translate(question, dest='en')
106
  dest_lang = find_dest_language(en_question, question_src_lang)
107
+ cleaned_question = remove_language_phrase(en_question)
108
+ vqa_answer = vilt_vqa(image, cleaned_question)
109
+ llm_answer = flan_t5_complete_sentence(cleaned_question, vqa_answer)
110
  final_answer, answer_src_lang = google_translate(llm_answer, dest=dest_lang)
111
  logger.info("Final Answer: " + final_answer)
112
  return final_answer
 
131
  image = gr.Image(type="pil")
132
  question = gr.Textbox(label="Question")
133
  answer = gr.Textbox(label="Predicted answer")
134
+ examples = [["apple.jpg", "In French, what is in my hand?"], ["cats.jpg", "What are the cats doing, in German?"], ["apple.jpg", "What color is this? Answer in Uyghur."], ["cats.jpg", "How many cats are here?"]]
135
 
136
  demo = gr.Interface(fn=vqa_main,
137
  inputs=[image, question],