jakep-allenai commited on
Commit
7ed6733
·
verified ·
1 Parent(s): 24221ab

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -5
README.md CHANGED
@@ -93,19 +93,19 @@ import urllib.request
93
 
94
  from io import BytesIO
95
  from PIL import Image
96
- from transformers import AutoProcessor, Qwen2_5VLForConditionalGeneration
97
 
98
  from olmocr.data.renderpdf import render_pdf_to_base64png
99
  from olmocr.prompts import build_no_anchoring_v4_yaml_prompt
100
 
101
  # Initialize the model
102
- model = Qwen2_5VLForConditionalGeneration.from_pretrained("allenai/olmOCR-7B-1025", torch_dtype=torch.bfloat16).eval()
103
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
104
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
105
  model.to(device)
106
 
107
  # Grab a sample PDF
108
- urllib.request.urlretrieve("https://molmo.allenai.org/paper.pdf", "./paper.pdf")
109
 
110
  # Render page 1 to an image
111
  image_base64 = render_pdf_to_base64png("./paper.pdf", 1, target_longest_image_dim=1288)
@@ -138,7 +138,7 @@ inputs = {key: value.to(device) for (key, value) in inputs.items()}
138
  # Generate the output
139
  output = model.generate(
140
  **inputs,
141
- temperature=0.8,
142
  max_new_tokens=50,
143
  num_return_sequences=1,
144
  do_sample=True,
@@ -152,7 +152,7 @@ text_output = processor.tokenizer.batch_decode(
152
  )
153
 
154
  print(text_output)
155
- # ['{"primary_language":"en","is_rotation_valid":true,"rotation_correction":0,"is_table":false,"is_diagram":false,"natural_text":"Molmo and PixMo:\\nOpen Weights and Open Data\\nfor State-of-the']
156
  ```
157
 
158
  ## License and use
 
93
 
94
  from io import BytesIO
95
  from PIL import Image
96
+ from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
97
 
98
  from olmocr.data.renderpdf import render_pdf_to_base64png
99
  from olmocr.prompts import build_no_anchoring_v4_yaml_prompt
100
 
101
  # Initialize the model
102
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained("allenai/olmOCR-7B-1025", torch_dtype=torch.bfloat16).eval()
103
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
104
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
105
  model.to(device)
106
 
107
  # Grab a sample PDF
108
+ urllib.request.urlretrieve("https://olmocr.allenai.org/papers/olmocr.pdf", "./paper.pdf")
109
 
110
  # Render page 1 to an image
111
  image_base64 = render_pdf_to_base64png("./paper.pdf", 1, target_longest_image_dim=1288)
 
138
  # Generate the output
139
  output = model.generate(
140
  **inputs,
141
+ temperature=0.1,
142
  max_new_tokens=50,
143
  num_return_sequences=1,
144
  do_sample=True,
 
152
  )
153
 
154
  print(text_output)
155
+ # ['---\nprimary_language: en\nis_rotation_valid: True\nrotation_correction: 0\nis_table: False\nis_diagram: False\n---\nolmOCR: Unlocking Trillions of Tokens in PDFs with Vision Language Models\n\nJake Poz']
156
  ```
157
 
158
  ## License and use