Spaces:

virendravaishnav
/

po-fetch-detail

Running

virendravaishnav commited on Sep 13

Commit

b966683

•

1 Parent(s): 6dadcd1

Updated with OCR model and Gradio integration

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,33 +1,20 @@
 import gradio as gr
-from transformers import AutoTokenizer
-from huggingface_hub import snapshot_download
-import sys
-import os
-# Download the model snapshot
 repo_id = "OpenGVLab/InternVL2-1B"
-model_dir = snapshot_download(repo_id)
-# Add the model directory to the Python path for dynamic imports
-sys.path.append(model_dir)
-# Import the custom configuration and model classes
-from configuration_internvl_chat import InternVLChatConfig
-from modeling_internvl_chat import InternVLForVision2Seq
-# Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)
-config = InternVLChatConfig.from_pretrained(repo_id, trust_remote_code=True)
-model = InternVLForVision2Seq.from_pretrained(repo_id, config=config, trust_remote_code=True)
-# Function to process and describe the image
 def analyze_image(image):
     img = image.convert("RGB")
-    inputs = tokenizer("describe this image", return_tensors="pt")
     outputs = model.generate(**inputs)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Gradio interface for image input
 demo = gr.Interface(
     fn=analyze_image,
     inputs=gr.Image(type="pil"),
@@ -37,4 +24,4 @@ demo = gr.Interface(
 )
 if __name__ == "__main__":
-    demo.launch(share=True)

 import gradio as gr
+from transformers import AutoTokenizer, AutoProcessor, AutoConfig, AutoModelForSeq2SeqLM
 repo_id = "OpenGVLab/InternVL2-1B"
+# Load the tokenizer, processor, and model directly from the Hugging Face Hub
 tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)
+processor = AutoProcessor.from_pretrained(repo_id, trust_remote_code=True)
+config = AutoConfig.from_pretrained(repo_id, trust_remote_code=True)
+model = AutoModelForSeq2SeqLM.from_pretrained(repo_id, config=config, trust_remote_code=True)
 def analyze_image(image):
     img = image.convert("RGB")
+    inputs = processor(images=img, text="describe this image", return_tensors="pt")
     outputs = model.generate(**inputs)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 demo = gr.Interface(
     fn=analyze_image,
     inputs=gr.Image(type="pil"),
 )
 if __name__ == "__main__":
+    demo.launch()

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
-transformers==4.31.0
-huggingface_hub==0.16.4
-gradio==3.28.3
-torch>=1.9
-Pillow==9.4.0

+transformers>=4.31.0
+gradio>=3.35.2
+torch>=1.9.0
+huggingface_hub>=0.14.1
+pillow
+accelerate