Spaces:

ixxan
/

cross-lingual-vqa

Running

ixxan commited on Sep 1

Commit

647ec6f

•

1 Parent(s): 8e8c9ba

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -104,9 +104,9 @@ def flan_t5_complete_sentence(question, answer):
 def vqa_main(image, question):
     en_question, question_src_lang = google_translate(question, dest='en')
     dest_lang = find_dest_language(en_question, question_src_lang)
-    vqa_input = remove_language_phrase(en_question)
-    vqa_answer = vilt_vqa(image, vqa_input)
-    llm_answer = flan_t5_complete_sentence(en_question, vqa_answer)
     final_answer, answer_src_lang = google_translate(llm_answer, dest=dest_lang)
     logger.info("Final Answer: " + final_answer)
     return final_answer
@@ -131,7 +131,7 @@ torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/0000000397
 image = gr.Image(type="pil")
 question = gr.Textbox(label="Question")
 answer = gr.Textbox(label="Predicted answer")
-examples = [["apple.jpg", "In French, what is in my hand?"], ["cats.jpg", "What are the cats doing, in German?"], ["apple.jpg", "What color is this? Answer in Turkish."], ["cats.jpg", "How many cats are here?"]]
 demo = gr.Interface(fn=vqa_main,
                          inputs=[image, question],

 def vqa_main(image, question):
     en_question, question_src_lang = google_translate(question, dest='en')
     dest_lang = find_dest_language(en_question, question_src_lang)
+    cleaned_question = remove_language_phrase(en_question)
+    vqa_answer = vilt_vqa(image, cleaned_question)
+    llm_answer = flan_t5_complete_sentence(cleaned_question, vqa_answer)
     final_answer, answer_src_lang = google_translate(llm_answer, dest=dest_lang)
     logger.info("Final Answer: " + final_answer)
     return final_answer
 image = gr.Image(type="pil")
 question = gr.Textbox(label="Question")
 answer = gr.Textbox(label="Predicted answer")
+examples = [["apple.jpg", "In French, what is in my hand?"], ["cats.jpg", "What are the cats doing, in German?"], ["apple.jpg", "What color is this? Answer in Uyghur."], ["cats.jpg", "How many cats are here?"]]
 demo = gr.Interface(fn=vqa_main,
                          inputs=[image, question],