Spaces:

arad1367
/

Visual-QA-MiniCPM-Llama3-V-2_5

Running on Zero

arad1367 commited on Jun 4

Commit

9673377

•

1 Parent(s): 461f2d9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

+import gradio as gr
+import torch
+from PIL import Image
+from transformers import AutoModel, AutoTokenizer
+# Load the model and tokenizer
+model = AutoModel.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5', trust_remote_code=True, torch_dtype=torch.float16)
+model = model.to(device='cuda')
+tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5', trust_remote_code=True)
+model.eval()
+# Define a function to generate a response
+def generate_response(image, question):
+    msgs = [{'role': 'user', 'content': question}]
+    res = model.chat(
+        image=image,
+        msgs=msgs,
+        tokenizer=tokenizer,
+        sampling=True,
+        temperature=0.7,
+        stream=True
+    )
+    generated_text = ""
+    for new_text in res:
+        generated_text += new_text
+    return generated_text
+# Create a Gradio interface
+iface = gr.Interface(
+    fn=generate_response,
+    inputs=[gr.Image(type="pil"), "text"],
+    outputs="text",
+    title="Visual Question Answering",
+    description="Input an image and a question related to the image to receive a response.",
+)
+# Launch the app
+iface.launch(debug=True)