lightonai
/

LightOnOCR-0.9B-32k-1025

@@ -46,7 +46,7 @@ Pruned-vocabulary version (32k tokens) optimized for European languages, offerin
 ## Model Overview
-**LightOnOCR** combines a high-performance Vision Transformer encoder with a lightweight text decoder distilled from high-quality open VLMs.
 It is optimized for document parsing tasks, producing accurate, layout-aware text extraction from high-resolution pages.
 ---
@@ -59,7 +59,7 @@ It is optimized for document parsing tasks, producing accurate, layout-aware tex
 | [LightOnOCR-1B-32k](https://huggingface.co/lightonai/LightOnOCR-0.9B-32k-1025) (32k vocab) | 80.6 | 66.2 | 73.5 | 33.5 | 71.2 | 87.6 | 99.5 | **73.1** |
 | [LightOnOCR-1B-16k](https://huggingface.co/lightonai/LightOnOCR-0.9B-16k-1025) (16k vocab) | 82.3 | 72.9 | 75.3 | 33.5 | 78.6 | 85.1 | 99.8 | **75.4** |
-All benchmarks evaluated using standardized LightOnOCR inference via **vLLM** on the LightOn internal OCR test suite (2025-10).
 ---
@@ -75,13 +75,16 @@ uv pip install -U vllm \
     --extra-index-url https://wheels.vllm.ai/nightly \
     --prerelease=allow
 uv pip install pypdfium2 pillow requests
 ```
 ## Start Server
 ```bash
-vllm serve lightonai/LightOnOCR-0.9B-32k-1025 \
     --limit-mm-per-prompt '{"image": 1}' \
     --async-scheduling
 ```
@@ -95,7 +98,7 @@ import pypdfium2 as pdfium
 import io
 ENDPOINT = "http://localhost:8000/v1/chat/completions"
-MODEL = "lightonai/LightOnOCR-0.9B-32k-1025"
 # Download PDF from arXiv
 pdf_url = "https://arxiv.org/pdf/2412.13663"
@@ -104,8 +107,8 @@ pdf_data = requests.get(pdf_url).content
 # Open PDF and convert first page to image
 pdf = pdfium.PdfDocument(pdf_data)
 page = pdf[0]
-# Render at 300 DPI (scale factor = 300/72 ≈ 4.17)
-pil_image = page.render(scale=4.17).to_pil()
 # Convert to base64
 buffer = io.BytesIO()
@@ -122,7 +125,7 @@ payload = {
             "image_url": {"url": f"data:image/png;base64,{image_base64}"}
         }]
     }],
-    "max_tokens": 6500,
     "temperature": 0.2,
     "top_p": 0.9,
 }

 ## Model Overview
+**LightOnOCR** combines a Vision Transformer encoder(Pixtral-based) with a lightweight text decoder(Qwen3-based) distilled from high-quality open VLMs.
 It is optimized for document parsing tasks, producing accurate, layout-aware text extraction from high-resolution pages.
 ---
 | [LightOnOCR-1B-32k](https://huggingface.co/lightonai/LightOnOCR-0.9B-32k-1025) (32k vocab) | 80.6 | 66.2 | 73.5 | 33.5 | 71.2 | 87.6 | 99.5 | **73.1** |
 | [LightOnOCR-1B-16k](https://huggingface.co/lightonai/LightOnOCR-0.9B-16k-1025) (16k vocab) | 82.3 | 72.9 | 75.3 | 33.5 | 78.6 | 85.1 | 99.8 | **75.4** |
+All benchmarks evaluated using **vLLM**.
 ---
     --extra-index-url https://wheels.vllm.ai/nightly \
     --prerelease=allow
+# if this fails try adding triton-kernels package
+'triton-kernels @ git+https://github.com/triton-lang/triton.git@v3.5.0#subdirectory=python/triton_kernels'
 uv pip install pypdfium2 pillow requests
 ```
 ## Start Server
 ```bash
+vllm serve lightonai/LightOnOCR-0.9B-16k-1025 \
     --limit-mm-per-prompt '{"image": 1}' \
     --async-scheduling
 ```
 import io
 ENDPOINT = "http://localhost:8000/v1/chat/completions"
+MODEL = "lightonai/LightOnOCR-0.9B-16k-1025"
 # Download PDF from arXiv
 pdf_url = "https://arxiv.org/pdf/2412.13663"
 # Open PDF and convert first page to image
 pdf = pdfium.PdfDocument(pdf_data)
 page = pdf[0]
+# Render at 200 DPI (scale factor = 200/72 ≈ 2.77)
+pil_image = page.render(scale=2.77).to_pil()
 # Convert to base64
 buffer = io.BytesIO()
             "image_url": {"url": f"data:image/png;base64,{image_base64}"}
         }]
     }],
+    "max_tokens": 4096,
     "temperature": 0.2,
     "top_p": 0.9,
 }