Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -104,9 +104,9 @@ def flan_t5_complete_sentence(question, answer):
|
|
104 |
def vqa_main(image, question):
|
105 |
en_question, question_src_lang = google_translate(question, dest='en')
|
106 |
dest_lang = find_dest_language(en_question, question_src_lang)
|
107 |
-
|
108 |
-
vqa_answer = vilt_vqa(image,
|
109 |
-
llm_answer = flan_t5_complete_sentence(
|
110 |
final_answer, answer_src_lang = google_translate(llm_answer, dest=dest_lang)
|
111 |
logger.info("Final Answer: " + final_answer)
|
112 |
return final_answer
|
@@ -131,7 +131,7 @@ torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/0000000397
|
|
131 |
image = gr.Image(type="pil")
|
132 |
question = gr.Textbox(label="Question")
|
133 |
answer = gr.Textbox(label="Predicted answer")
|
134 |
-
examples = [["apple.jpg", "In French, what is in my hand?"], ["cats.jpg", "What are the cats doing, in German?"], ["apple.jpg", "What color is this? Answer in
|
135 |
|
136 |
demo = gr.Interface(fn=vqa_main,
|
137 |
inputs=[image, question],
|
|
|
104 |
def vqa_main(image, question):
|
105 |
en_question, question_src_lang = google_translate(question, dest='en')
|
106 |
dest_lang = find_dest_language(en_question, question_src_lang)
|
107 |
+
cleaned_question = remove_language_phrase(en_question)
|
108 |
+
vqa_answer = vilt_vqa(image, cleaned_question)
|
109 |
+
llm_answer = flan_t5_complete_sentence(cleaned_question, vqa_answer)
|
110 |
final_answer, answer_src_lang = google_translate(llm_answer, dest=dest_lang)
|
111 |
logger.info("Final Answer: " + final_answer)
|
112 |
return final_answer
|
|
|
131 |
image = gr.Image(type="pil")
|
132 |
question = gr.Textbox(label="Question")
|
133 |
answer = gr.Textbox(label="Predicted answer")
|
134 |
+
examples = [["apple.jpg", "In French, what is in my hand?"], ["cats.jpg", "What are the cats doing, in German?"], ["apple.jpg", "What color is this? Answer in Uyghur."], ["cats.jpg", "How many cats are here?"]]
|
135 |
|
136 |
demo = gr.Interface(fn=vqa_main,
|
137 |
inputs=[image, question],
|