Build

Paused

ManishThota commited on Feb 12, 2024

Commit

702cb53

verified ·

1 Parent(s): 89e1517

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -39,7 +39,7 @@ tokenizer = AutoTokenizer.from_pretrained("ManishThota/Sparrow", trust_remote_co
 #     return generated_text
-def predict_answer(image, question):
     #Set inputs
     text = f"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\n{question}? ASSISTANT:"
     image = Image.open(image)
@@ -50,20 +50,22 @@ def predict_answer(image, question):
     #Generate the answer
     output_ids = model.generate(
         input_ids,
-        max_new_tokens=25,
         images=image_tensor,
         use_cache=True)[0]
     return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
-def gradio_predict(image, question):
-    answer = predict_answer(image, question)
     return answer
 # Define the Gradio interface
 iface = gr.Interface(
     fn=gradio_predict,
-    inputs=[gr.Image(type="pil", label="Upload or Drag an Image"), gr.Textbox(label="Question", placeholder="e.g. What are the colors of the bus in the image?", scale=4)],
     outputs=gr.TextArea(label="Answer"),
     title="Sparrow-based Visual Question Answering",
     description="An interactive chat model that can answer questions about images.",

 #     return generated_text
+def predict_answer(image, question, max_tokens):
     #Set inputs
     text = f"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\n{question}? ASSISTANT:"
     image = Image.open(image)
     #Generate the answer
     output_ids = model.generate(
         input_ids,
+        max_new_tokens=max_tokens,
         images=image_tensor,
         use_cache=True)[0]
     return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
+def gradio_predict(image, question, max_tokens=25):
+    answer = predict_answer(image, question, max_tokens)
     return answer
 # Define the Gradio interface
 iface = gr.Interface(
     fn=gradio_predict,
+    inputs=[gr.Image(type="pil", label="Upload or Drag an Image"),
+            gr.Textbox(label="Question", placeholder="e.g. What are the colors of the bus in the image?", scale=4),
+            gr.Slider(minimum=1, maximum=100, default=25, label="Max Number of Tokens")],
     outputs=gr.TextArea(label="Answer"),
     title="Sparrow-based Visual Question Answering",
     description="An interactive chat model that can answer questions about images.",