cosmo3769 commited on
Commit
0433fd6
1 Parent(s): a6a967c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
3
+ from PIL import Image
4
+ import torch
5
+
6
+ # Load model and processor
7
+ mix_model_id = "google/paligemma-3b-mix-224"
8
+ mix_model = PaliGemmaForConditionalGeneration.from_pretrained(mix_model_id)
9
+ mix_processor = AutoProcessor.from_pretrained(mix_model_id)
10
+
11
+ # Define inference function
12
+ def process_image(image, prompt):
13
+ # Process the image and prompt using the processor
14
+ inputs = mix_processor(image.convert("RGB"), prompt, return_tensors="pt")
15
+
16
+ try:
17
+ # Generate output from the model
18
+ output = mix_model.generate(**inputs, max_new_tokens=20)
19
+
20
+ # Decode and return the output
21
+ decoded_output = mix_processor.decode(output[0], skip_special_tokens=True)
22
+
23
+ # Return the answer (exclude the prompt part from output)
24
+ return decoded_output[len(prompt):]
25
+ except IndexError as e:
26
+ print(f"IndexError: {e}")
27
+ return "An error occurred during processing."
28
+
29
+ # Define the Gradio interface
30
+ inputs = [
31
+ gr.Image(type="pil"),
32
+ gr.Textbox(label="Prompt", placeholder="Enter your question")
33
+ ]
34
+ outputs = gr.Textbox(label="Answer")
35
+
36
+ # Create the Gradio app
37
+ demo = gr.Interface(fn=process_image, inputs=inputs, outputs=outputs, title="Image Captioning with Mix PaliGemma Model",
38
+ description="Upload an image and get captions based on your prompt.")
39
+
40
+ # Launch the app
41
+ demo.launch()