Spaces:

diabolic6045
/

japanese-stable-vlm-demo

Sleeping

App Files Files Community

diabolic6045 commited on Jun 7

Commit

beede2c

•

1 Parent(s): e7bca13

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -18

app.py CHANGED Viewed

@@ -8,7 +8,6 @@ import os
 from huggingface_hub import login
 login(os.environ["HF_KEY"])
-# Load the model and tokenizer
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model = AutoModelForVision2Seq.from_pretrained("stabilityai/japanese-stable-vlm", trust_remote_code=True, device_map='auto')
 processor = AutoImageProcessor.from_pretrained("stabilityai/japanese-stable-vlm", device_map='auto')
@@ -42,7 +41,6 @@ def build_prompt(task="caption", input=None, sep="\n\n### "):
     return p
 # Define the function to generate text from the image and prompt
-@spaces.GPU(duration=120)
 def generate_text(image, task, input_text=None):
     prompt = build_prompt(task=task, input=input_text)
     inputs = processor(images=image, return_tensors="pt")
@@ -60,21 +58,21 @@ def generate_text(image, task, input_text=None):
     return generated_text
 # Define the Gradio interface
-with gr.Blocks() as demo:
-    chatbot = gr.Chatbot([], elem_id="chatbot", show_copy_button=True)
-    with gr.Group():
-        with gr.Row():
-            image_input = gr.Image(label="Upload an image")
-            task_input = gr.Radio(choices=["caption", "tag", "vqa"], value="caption", label="Select a task")
-        text_input = gr.Textbox(label="Enter text (for tag or vqa tasks)")
-        submit_btn = gr.Button("Submit")
-    inputs = [image_input, task_input, text_input]
-    outputs = chatbot
-    submit_btn.click(generate_text, inputs, outputs, api_name="generate_text")
-    # Event listeners
-    chatbot.change(lambda x: print(f"Chatbot changed: {x}"), chatbot, chatbot)
-    chatbot.select(lambda x: print(f"Chatbot selected: {x.value}, {x.selected}"), None, chatbot)
-    chatbot.like(lambda x: print(f"Liked/Disliked: {x.index}, {x.value}, {x.liked}"), None, chatbot)
-demo.launch()

 from huggingface_hub import login
 login(os.environ["HF_KEY"])
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model = AutoModelForVision2Seq.from_pretrained("stabilityai/japanese-stable-vlm", trust_remote_code=True, device_map='auto')
 processor = AutoImageProcessor.from_pretrained("stabilityai/japanese-stable-vlm", device_map='auto')
     return p
 # Define the function to generate text from the image and prompt
 def generate_text(image, task, input_text=None):
     prompt = build_prompt(task=task, input=input_text)
     inputs = processor(images=image, return_tensors="pt")
     return generated_text
 # Define the Gradio interface
+image_input = gr.Image(label="Upload an image")
+task_input = gr.Radio(choices=["caption", "tag", "vqa"], value="caption", label="Select a task")
+text_input = gr.Textbox(label="Enter text (for tag or vqa tasks)")
+output = gr.Textbox(label="Generated text")
+interface = gr.Interface(
+    fn=generate_text,
+    inputs=[image_input, task_input, text_input],
+    outputs=output,
+    examples=[
+        ["examples/example_image.jpg", "caption", None],
+        ["examples/example_image.jpg", "tag", "河津桜、青空"],
+        ["examples/example_image.jpg", "vqa", "OCRはできますか？"],
+    ],
+)
+interface.launch()