aavetis commited on
Commit
109b906
1 Parent(s): 26abb52
Files changed (2) hide show
  1. app.py +15 -5
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from uform import gen_model
3
  from PIL import Image
 
4
 
5
  # Load the model and processor
6
  model = gen_model.VLMForCausalLM.from_pretrained("unum-cloud/uform-gen")
@@ -8,20 +9,29 @@ processor = gen_model.VLMProcessor.from_pretrained("unum-cloud/uform-gen")
8
 
9
  def generate_caption(image, prompt):
10
  # Process the image and the prompt
11
- inputs = processor(text=prompt, images=image, return_tensors="pt", padding=True)
12
 
13
  # Generate the output
14
- outputs = model.generate(**inputs)
15
- caption = processor.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
16
 
17
- return caption
 
 
 
18
 
19
  # Define the Gradio interface
20
  iface = gr.Interface(
21
  fn=generate_caption,
22
  inputs=[gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Prompt")],
23
  outputs=gr.Textbox(label="Generated Caption"),
24
- examples=[["jungle-glass.png", ""]]
25
  )
26
 
27
  # Launch the interface
 
1
  import gradio as gr
2
  from uform import gen_model
3
  from PIL import Image
4
+ import torch
5
 
6
  # Load the model and processor
7
  model = gen_model.VLMForCausalLM.from_pretrained("unum-cloud/uform-gen")
 
9
 
10
  def generate_caption(image, prompt):
11
  # Process the image and the prompt
12
+ inputs = processor(texts=[prompt], images=[image], return_tensors="pt")
13
 
14
  # Generate the output
15
+ with torch.inference_mode():
16
+ output = model.generate(
17
+ **inputs,
18
+ do_sample=False,
19
+ use_cache=True,
20
+ max_new_tokens=128,
21
+ eos_token_id=32001,
22
+ pad_token_id=processor.tokenizer.pad_token_id
23
+ )
24
 
25
+ prompt_len = inputs["input_ids"].shape[1]
26
+ decoded_text = processor.batch_decode(output[:, prompt_len:])[0]
27
+
28
+ return decoded_text
29
 
30
  # Define the Gradio interface
31
  iface = gr.Interface(
32
  fn=generate_caption,
33
  inputs=[gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Prompt")],
34
  outputs=gr.Textbox(label="Generated Caption"),
 
35
  )
36
 
37
  # Launch the interface
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  gradio
2
  uform
 
 
1
  gradio
2
  uform
3
+ torch