virendravaishnav
commited on
Commit
•
b966683
1
Parent(s):
6dadcd1
Updated with OCR model and Gradio integration
Browse files- app.py +7 -20
- requirements.txt +6 -5
app.py
CHANGED
@@ -1,33 +1,20 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import AutoTokenizer
|
3 |
-
from huggingface_hub import snapshot_download
|
4 |
-
import sys
|
5 |
-
import os
|
6 |
|
7 |
-
# Download the model snapshot
|
8 |
repo_id = "OpenGVLab/InternVL2-1B"
|
9 |
-
model_dir = snapshot_download(repo_id)
|
10 |
|
11 |
-
#
|
12 |
-
sys.path.append(model_dir)
|
13 |
-
|
14 |
-
# Import the custom configuration and model classes
|
15 |
-
from configuration_internvl_chat import InternVLChatConfig
|
16 |
-
from modeling_internvl_chat import InternVLForVision2Seq
|
17 |
-
|
18 |
-
# Load the tokenizer and model
|
19 |
tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)
|
20 |
-
|
21 |
-
|
|
|
22 |
|
23 |
-
# Function to process and describe the image
|
24 |
def analyze_image(image):
|
25 |
img = image.convert("RGB")
|
26 |
-
inputs =
|
27 |
outputs = model.generate(**inputs)
|
28 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
29 |
|
30 |
-
# Gradio interface for image input
|
31 |
demo = gr.Interface(
|
32 |
fn=analyze_image,
|
33 |
inputs=gr.Image(type="pil"),
|
@@ -37,4 +24,4 @@ demo = gr.Interface(
|
|
37 |
)
|
38 |
|
39 |
if __name__ == "__main__":
|
40 |
-
demo.launch(
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer, AutoProcessor, AutoConfig, AutoModelForSeq2SeqLM
|
|
|
|
|
|
|
3 |
|
|
|
4 |
repo_id = "OpenGVLab/InternVL2-1B"
|
|
|
5 |
|
6 |
+
# Load the tokenizer, processor, and model directly from the Hugging Face Hub
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)
|
8 |
+
processor = AutoProcessor.from_pretrained(repo_id, trust_remote_code=True)
|
9 |
+
config = AutoConfig.from_pretrained(repo_id, trust_remote_code=True)
|
10 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(repo_id, config=config, trust_remote_code=True)
|
11 |
|
|
|
12 |
def analyze_image(image):
|
13 |
img = image.convert("RGB")
|
14 |
+
inputs = processor(images=img, text="describe this image", return_tensors="pt")
|
15 |
outputs = model.generate(**inputs)
|
16 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
17 |
|
|
|
18 |
demo = gr.Interface(
|
19 |
fn=analyze_image,
|
20 |
inputs=gr.Image(type="pil"),
|
|
|
24 |
)
|
25 |
|
26 |
if __name__ == "__main__":
|
27 |
+
demo.launch()
|
requirements.txt
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
-
transformers
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
|
|
|
1 |
+
transformers>=4.31.0
|
2 |
+
gradio>=3.35.2
|
3 |
+
torch>=1.9.0
|
4 |
+
huggingface_hub>=0.14.1
|
5 |
+
pillow
|
6 |
+
accelerate
|