Update README.md
Browse files
README.md
CHANGED
|
@@ -93,19 +93,19 @@ import urllib.request
|
|
| 93 |
|
| 94 |
from io import BytesIO
|
| 95 |
from PIL import Image
|
| 96 |
-
from transformers import AutoProcessor,
|
| 97 |
|
| 98 |
from olmocr.data.renderpdf import render_pdf_to_base64png
|
| 99 |
from olmocr.prompts import build_no_anchoring_v4_yaml_prompt
|
| 100 |
|
| 101 |
# Initialize the model
|
| 102 |
-
model =
|
| 103 |
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
|
| 104 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 105 |
model.to(device)
|
| 106 |
|
| 107 |
# Grab a sample PDF
|
| 108 |
-
urllib.request.urlretrieve("https://
|
| 109 |
|
| 110 |
# Render page 1 to an image
|
| 111 |
image_base64 = render_pdf_to_base64png("./paper.pdf", 1, target_longest_image_dim=1288)
|
|
@@ -138,7 +138,7 @@ inputs = {key: value.to(device) for (key, value) in inputs.items()}
|
|
| 138 |
# Generate the output
|
| 139 |
output = model.generate(
|
| 140 |
**inputs,
|
| 141 |
-
temperature=0.
|
| 142 |
max_new_tokens=50,
|
| 143 |
num_return_sequences=1,
|
| 144 |
do_sample=True,
|
|
@@ -152,7 +152,7 @@ text_output = processor.tokenizer.batch_decode(
|
|
| 152 |
)
|
| 153 |
|
| 154 |
print(text_output)
|
| 155 |
-
# ['
|
| 156 |
```
|
| 157 |
|
| 158 |
## License and use
|
|
|
|
| 93 |
|
| 94 |
from io import BytesIO
|
| 95 |
from PIL import Image
|
| 96 |
+
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
|
| 97 |
|
| 98 |
from olmocr.data.renderpdf import render_pdf_to_base64png
|
| 99 |
from olmocr.prompts import build_no_anchoring_v4_yaml_prompt
|
| 100 |
|
| 101 |
# Initialize the model
|
| 102 |
+
model = Qwen2_5_VLForConditionalGeneration.from_pretrained("allenai/olmOCR-7B-1025", torch_dtype=torch.bfloat16).eval()
|
| 103 |
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
|
| 104 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 105 |
model.to(device)
|
| 106 |
|
| 107 |
# Grab a sample PDF
|
| 108 |
+
urllib.request.urlretrieve("https://olmocr.allenai.org/papers/olmocr.pdf", "./paper.pdf")
|
| 109 |
|
| 110 |
# Render page 1 to an image
|
| 111 |
image_base64 = render_pdf_to_base64png("./paper.pdf", 1, target_longest_image_dim=1288)
|
|
|
|
| 138 |
# Generate the output
|
| 139 |
output = model.generate(
|
| 140 |
**inputs,
|
| 141 |
+
temperature=0.1,
|
| 142 |
max_new_tokens=50,
|
| 143 |
num_return_sequences=1,
|
| 144 |
do_sample=True,
|
|
|
|
| 152 |
)
|
| 153 |
|
| 154 |
print(text_output)
|
| 155 |
+
# ['---\nprimary_language: en\nis_rotation_valid: True\nrotation_correction: 0\nis_table: False\nis_diagram: False\n---\nolmOCR: Unlocking Trillions of Tokens in PDFs with Vision Language Models\n\nJake Poz']
|
| 156 |
```
|
| 157 |
|
| 158 |
## License and use
|