Update README.md
Browse files
README.md
CHANGED
|
@@ -46,7 +46,7 @@ Pruned-vocabulary version (32k tokens) optimized for European languages, offerin
|
|
| 46 |
|
| 47 |
## Model Overview
|
| 48 |
|
| 49 |
-
**LightOnOCR** combines a
|
| 50 |
It is optimized for document parsing tasks, producing accurate, layout-aware text extraction from high-resolution pages.
|
| 51 |
|
| 52 |
---
|
|
@@ -59,7 +59,7 @@ It is optimized for document parsing tasks, producing accurate, layout-aware tex
|
|
| 59 |
| [LightOnOCR-1B-32k](https://huggingface.co/lightonai/LightOnOCR-0.9B-32k-1025) (32k vocab) | 80.6 | 66.2 | 73.5 | 33.5 | 71.2 | 87.6 | 99.5 | **73.1** |
|
| 60 |
| [LightOnOCR-1B-16k](https://huggingface.co/lightonai/LightOnOCR-0.9B-16k-1025) (16k vocab) | 82.3 | 72.9 | 75.3 | 33.5 | 78.6 | 85.1 | 99.8 | **75.4** |
|
| 61 |
|
| 62 |
-
All benchmarks evaluated using
|
| 63 |
|
| 64 |
---
|
| 65 |
|
|
@@ -75,13 +75,16 @@ uv pip install -U vllm \
|
|
| 75 |
--extra-index-url https://wheels.vllm.ai/nightly \
|
| 76 |
--prerelease=allow
|
| 77 |
|
|
|
|
|
|
|
|
|
|
| 78 |
uv pip install pypdfium2 pillow requests
|
| 79 |
```
|
| 80 |
|
| 81 |
## Start Server
|
| 82 |
|
| 83 |
```bash
|
| 84 |
-
vllm serve lightonai/LightOnOCR-0.9B-
|
| 85 |
--limit-mm-per-prompt '{"image": 1}' \
|
| 86 |
--async-scheduling
|
| 87 |
```
|
|
@@ -95,7 +98,7 @@ import pypdfium2 as pdfium
|
|
| 95 |
import io
|
| 96 |
|
| 97 |
ENDPOINT = "http://localhost:8000/v1/chat/completions"
|
| 98 |
-
MODEL = "lightonai/LightOnOCR-0.9B-
|
| 99 |
|
| 100 |
# Download PDF from arXiv
|
| 101 |
pdf_url = "https://arxiv.org/pdf/2412.13663"
|
|
@@ -104,8 +107,8 @@ pdf_data = requests.get(pdf_url).content
|
|
| 104 |
# Open PDF and convert first page to image
|
| 105 |
pdf = pdfium.PdfDocument(pdf_data)
|
| 106 |
page = pdf[0]
|
| 107 |
-
# Render at
|
| 108 |
-
pil_image = page.render(scale=
|
| 109 |
|
| 110 |
# Convert to base64
|
| 111 |
buffer = io.BytesIO()
|
|
@@ -122,7 +125,7 @@ payload = {
|
|
| 122 |
"image_url": {"url": f"data:image/png;base64,{image_base64}"}
|
| 123 |
}]
|
| 124 |
}],
|
| 125 |
-
"max_tokens":
|
| 126 |
"temperature": 0.2,
|
| 127 |
"top_p": 0.9,
|
| 128 |
}
|
|
|
|
| 46 |
|
| 47 |
## Model Overview
|
| 48 |
|
| 49 |
+
**LightOnOCR** combines a Vision Transformer encoder(Pixtral-based) with a lightweight text decoder(Qwen3-based) distilled from high-quality open VLMs.
|
| 50 |
It is optimized for document parsing tasks, producing accurate, layout-aware text extraction from high-resolution pages.
|
| 51 |
|
| 52 |
---
|
|
|
|
| 59 |
| [LightOnOCR-1B-32k](https://huggingface.co/lightonai/LightOnOCR-0.9B-32k-1025) (32k vocab) | 80.6 | 66.2 | 73.5 | 33.5 | 71.2 | 87.6 | 99.5 | **73.1** |
|
| 60 |
| [LightOnOCR-1B-16k](https://huggingface.co/lightonai/LightOnOCR-0.9B-16k-1025) (16k vocab) | 82.3 | 72.9 | 75.3 | 33.5 | 78.6 | 85.1 | 99.8 | **75.4** |
|
| 61 |
|
| 62 |
+
All benchmarks evaluated using **vLLM**.
|
| 63 |
|
| 64 |
---
|
| 65 |
|
|
|
|
| 75 |
--extra-index-url https://wheels.vllm.ai/nightly \
|
| 76 |
--prerelease=allow
|
| 77 |
|
| 78 |
+
# if this fails try adding triton-kernels package
|
| 79 |
+
'triton-kernels @ git+https://github.com/triton-lang/triton.git@v3.5.0#subdirectory=python/triton_kernels'
|
| 80 |
+
|
| 81 |
uv pip install pypdfium2 pillow requests
|
| 82 |
```
|
| 83 |
|
| 84 |
## Start Server
|
| 85 |
|
| 86 |
```bash
|
| 87 |
+
vllm serve lightonai/LightOnOCR-0.9B-16k-1025 \
|
| 88 |
--limit-mm-per-prompt '{"image": 1}' \
|
| 89 |
--async-scheduling
|
| 90 |
```
|
|
|
|
| 98 |
import io
|
| 99 |
|
| 100 |
ENDPOINT = "http://localhost:8000/v1/chat/completions"
|
| 101 |
+
MODEL = "lightonai/LightOnOCR-0.9B-16k-1025"
|
| 102 |
|
| 103 |
# Download PDF from arXiv
|
| 104 |
pdf_url = "https://arxiv.org/pdf/2412.13663"
|
|
|
|
| 107 |
# Open PDF and convert first page to image
|
| 108 |
pdf = pdfium.PdfDocument(pdf_data)
|
| 109 |
page = pdf[0]
|
| 110 |
+
# Render at 200 DPI (scale factor = 200/72 ≈ 2.77)
|
| 111 |
+
pil_image = page.render(scale=2.77).to_pil()
|
| 112 |
|
| 113 |
# Convert to base64
|
| 114 |
buffer = io.BytesIO()
|
|
|
|
| 125 |
"image_url": {"url": f"data:image/png;base64,{image_base64}"}
|
| 126 |
}]
|
| 127 |
}],
|
| 128 |
+
"max_tokens": 4096,
|
| 129 |
"temperature": 0.2,
|
| 130 |
"top_p": 0.9,
|
| 131 |
}
|