aavetis commited on
Commit
cdcc865
β€’
1 Parent(s): 1ab2c0c
Files changed (5) hide show
  1. .gitignore +1 -0
  2. README copy.md +12 -0
  3. app.py +36 -0
  4. jungle-glass.png +0 -0
  5. requirements.txt +2 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ venv
README copy.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Ugen Image Captioning
3
+ emoji: πŸ“Š
4
+ colorFrom: green
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.12.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from uform import gen_model
3
+ from PIL import Image
4
+ import requests
5
+
6
+ # Load the model and processor
7
+ model = gen_model.VLMForCausalLM.from_pretrained("unum-cloud/uform-gen")
8
+ processor = gen_model.VLMProcessor.from_pretrained("unum-cloud/uform-gen")
9
+
10
+
11
+ def generate_caption(image, prompt):
12
+ # Process the image and the prompt
13
+ inputs = processor(text=prompt, images=image, return_tensors="pt", padding=True)
14
+
15
+ # Generate the output
16
+ outputs = model.generate(**inputs)
17
+ caption = processor.decode(outputs[0], skip_special_tokens=True)
18
+
19
+ return caption
20
+
21
+
22
+ # Load the demo image
23
+ demo_image = Image.open("jungle-glass.png")
24
+
25
+ # Define the Gradio interface
26
+ iface = gr.Interface(
27
+ fn=generate_caption,
28
+ inputs=[
29
+ gr.inputs.Image(type="pil", label="Upload Image", default=demo_image),
30
+ gr.inputs.Textbox(label="Prompt"),
31
+ ],
32
+ outputs=gr.outputs.Textbox(label="Generated Caption"),
33
+ )
34
+
35
+ # Launch the interface
36
+ iface.launch()
jungle-glass.png ADDED
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ uform