virendravaishnav commited on
Commit
b966683
1 Parent(s): 6dadcd1

Updated with OCR model and Gradio integration

Browse files
Files changed (2) hide show
  1. app.py +7 -20
  2. requirements.txt +6 -5
app.py CHANGED
@@ -1,33 +1,20 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer
3
- from huggingface_hub import snapshot_download
4
- import sys
5
- import os
6
 
7
- # Download the model snapshot
8
  repo_id = "OpenGVLab/InternVL2-1B"
9
- model_dir = snapshot_download(repo_id)
10
 
11
- # Add the model directory to the Python path for dynamic imports
12
- sys.path.append(model_dir)
13
-
14
- # Import the custom configuration and model classes
15
- from configuration_internvl_chat import InternVLChatConfig
16
- from modeling_internvl_chat import InternVLForVision2Seq
17
-
18
- # Load the tokenizer and model
19
  tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)
20
- config = InternVLChatConfig.from_pretrained(repo_id, trust_remote_code=True)
21
- model = InternVLForVision2Seq.from_pretrained(repo_id, config=config, trust_remote_code=True)
 
22
 
23
- # Function to process and describe the image
24
  def analyze_image(image):
25
  img = image.convert("RGB")
26
- inputs = tokenizer("describe this image", return_tensors="pt")
27
  outputs = model.generate(**inputs)
28
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
29
 
30
- # Gradio interface for image input
31
  demo = gr.Interface(
32
  fn=analyze_image,
33
  inputs=gr.Image(type="pil"),
@@ -37,4 +24,4 @@ demo = gr.Interface(
37
  )
38
 
39
  if __name__ == "__main__":
40
- demo.launch(share=True)
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoProcessor, AutoConfig, AutoModelForSeq2SeqLM
 
 
 
3
 
 
4
  repo_id = "OpenGVLab/InternVL2-1B"
 
5
 
6
+ # Load the tokenizer, processor, and model directly from the Hugging Face Hub
 
 
 
 
 
 
 
7
  tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)
8
+ processor = AutoProcessor.from_pretrained(repo_id, trust_remote_code=True)
9
+ config = AutoConfig.from_pretrained(repo_id, trust_remote_code=True)
10
+ model = AutoModelForSeq2SeqLM.from_pretrained(repo_id, config=config, trust_remote_code=True)
11
 
 
12
  def analyze_image(image):
13
  img = image.convert("RGB")
14
+ inputs = processor(images=img, text="describe this image", return_tensors="pt")
15
  outputs = model.generate(**inputs)
16
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
17
 
 
18
  demo = gr.Interface(
19
  fn=analyze_image,
20
  inputs=gr.Image(type="pil"),
 
24
  )
25
 
26
  if __name__ == "__main__":
27
+ demo.launch()
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
- transformers==4.31.0
2
- huggingface_hub==0.16.4
3
- gradio==3.28.3
4
- torch>=1.9
5
- Pillow==9.4.0
 
 
1
+ transformers>=4.31.0
2
+ gradio>=3.35.2
3
+ torch>=1.9.0
4
+ huggingface_hub>=0.14.1
5
+ pillow
6
+ accelerate