diabolic6045 commited on
Commit
beede2c
1 Parent(s): e7bca13

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -18
app.py CHANGED
@@ -8,7 +8,6 @@ import os
8
  from huggingface_hub import login
9
  login(os.environ["HF_KEY"])
10
 
11
- # Load the model and tokenizer
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
  model = AutoModelForVision2Seq.from_pretrained("stabilityai/japanese-stable-vlm", trust_remote_code=True, device_map='auto')
14
  processor = AutoImageProcessor.from_pretrained("stabilityai/japanese-stable-vlm", device_map='auto')
@@ -42,7 +41,6 @@ def build_prompt(task="caption", input=None, sep="\n\n### "):
42
  return p
43
 
44
  # Define the function to generate text from the image and prompt
45
- @spaces.GPU(duration=120)
46
  def generate_text(image, task, input_text=None):
47
  prompt = build_prompt(task=task, input=input_text)
48
  inputs = processor(images=image, return_tensors="pt")
@@ -60,21 +58,21 @@ def generate_text(image, task, input_text=None):
60
  return generated_text
61
 
62
  # Define the Gradio interface
63
- with gr.Blocks() as demo:
64
- chatbot = gr.Chatbot([], elem_id="chatbot", show_copy_button=True)
65
- with gr.Group():
66
- with gr.Row():
67
- image_input = gr.Image(label="Upload an image")
68
- task_input = gr.Radio(choices=["caption", "tag", "vqa"], value="caption", label="Select a task")
69
- text_input = gr.Textbox(label="Enter text (for tag or vqa tasks)")
70
- submit_btn = gr.Button("Submit")
71
- inputs = [image_input, task_input, text_input]
72
- outputs = chatbot
73
- submit_btn.click(generate_text, inputs, outputs, api_name="generate_text")
74
 
75
- # Event listeners
76
- chatbot.change(lambda x: print(f"Chatbot changed: {x}"), chatbot, chatbot)
77
- chatbot.select(lambda x: print(f"Chatbot selected: {x.value}, {x.selected}"), None, chatbot)
78
- chatbot.like(lambda x: print(f"Liked/Disliked: {x.index}, {x.value}, {x.liked}"), None, chatbot)
79
 
80
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
8
  from huggingface_hub import login
9
  login(os.environ["HF_KEY"])
10
 
 
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
  model = AutoModelForVision2Seq.from_pretrained("stabilityai/japanese-stable-vlm", trust_remote_code=True, device_map='auto')
13
  processor = AutoImageProcessor.from_pretrained("stabilityai/japanese-stable-vlm", device_map='auto')
 
41
  return p
42
 
43
  # Define the function to generate text from the image and prompt
 
44
  def generate_text(image, task, input_text=None):
45
  prompt = build_prompt(task=task, input=input_text)
46
  inputs = processor(images=image, return_tensors="pt")
 
58
  return generated_text
59
 
60
  # Define the Gradio interface
61
+ image_input = gr.Image(label="Upload an image")
62
+ task_input = gr.Radio(choices=["caption", "tag", "vqa"], value="caption", label="Select a task")
63
+ text_input = gr.Textbox(label="Enter text (for tag or vqa tasks)")
 
 
 
 
 
 
 
 
64
 
65
+ output = gr.Textbox(label="Generated text")
 
 
 
66
 
67
+ interface = gr.Interface(
68
+ fn=generate_text,
69
+ inputs=[image_input, task_input, text_input],
70
+ outputs=output,
71
+ examples=[
72
+ ["examples/example_image.jpg", "caption", None],
73
+ ["examples/example_image.jpg", "tag", "河津桜、青空"],
74
+ ["examples/example_image.jpg", "vqa", "OCRはできますか?"],
75
+ ],
76
+ )
77
+
78
+ interface.launch()