Spaces:

IFMedTechdemo
/

Multi-Model-OCR

Running on Zero

App Files Files Community

IFMedTechdemo commited on Oct 27

Commit

acb34dc

verified ·

1 Parent(s): d6f6041

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -10

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ from transformers import (
     AutoModel,
     AutoModelForCausalLM,
     AutoTokenizer,
-    Qwen3VLForConditionalGeneration,
     Qwen2_5_VLForConditionalGeneration,
     TextIteratorStreamer
 )
@@ -21,14 +21,14 @@ import time
 # Device setup
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load Chandra-OCR
 MODEL_ID_V = "datalab-to/chandra"
 processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
-model_v = Qwen3VLForConditionalGeneration.from_pretrained(
     MODEL_ID_V,
     trust_remote_code=True,
     torch_dtype=torch.float16,
-    attn_implementation="sdpa"  # Use PyTorch's native scaled dot product attention
 ).to(device).eval()
 # Load Nanonets-OCR2-3B
@@ -38,15 +38,15 @@ model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_X,
     trust_remote_code=True,
     torch_dtype=torch.float16,
-    attn_implementation="sdpa"  # Use PyTorch's native attention
 ).to(device).eval()
-# Load Dots.OCR - REMOVE flash_attention_2 parameter
 MODEL_PATH_D = "strangervisionhf/dots.ocr-base-fix"
 processor_d = AutoProcessor.from_pretrained(MODEL_PATH_D, trust_remote_code=True)
 model_d = AutoModelForCausalLM.from_pretrained(
     MODEL_PATH_D,
-    attn_implementation="sdpa",  # Changed from flash_attention_2
     torch_dtype=torch.bfloat16,
     device_map="auto",
     trust_remote_code=True
@@ -59,15 +59,15 @@ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M,
     trust_remote_code=True,
     torch_dtype=torch.bfloat16,
-    attn_implementation="sdpa"  # Use PyTorch's native attention
 ).to(device).eval()
-# Load DeepSeek-OCR - REMOVE flash_attention_2 parameter
 MODEL_ID_DS = "deepseek-ai/DeepSeek-OCR"
 tokenizer_ds = AutoTokenizer.from_pretrained(MODEL_ID_DS, trust_remote_code=True)
 model_ds = AutoModel.from_pretrained(
     MODEL_ID_DS,
-    attn_implementation="sdpa",  # Changed from flash_attention_2
     trust_remote_code=True,
     use_safetensors=True
 ).eval().to(device).to(torch.bfloat16)

     AutoModel,
     AutoModelForCausalLM,
     AutoTokenizer,
+    Qwen2VLForConditionalGeneration,  # Changed from Qwen3VL
     Qwen2_5_VLForConditionalGeneration,
     TextIteratorStreamer
 )
 # Device setup
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load Chandra-OCR (uses Qwen2.5-VL architecture)
 MODEL_ID_V = "datalab-to/chandra"
 processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
+model_v = Qwen2_5_VLForConditionalGeneration.from_pretrained(  # Changed to Qwen2_5
     MODEL_ID_V,
     trust_remote_code=True,
     torch_dtype=torch.float16,
+    attn_implementation="sdpa"
 ).to(device).eval()
 # Load Nanonets-OCR2-3B
     MODEL_ID_X,
     trust_remote_code=True,
     torch_dtype=torch.float16,
+    attn_implementation="sdpa"
 ).to(device).eval()
+# Load Dots.OCR
 MODEL_PATH_D = "strangervisionhf/dots.ocr-base-fix"
 processor_d = AutoProcessor.from_pretrained(MODEL_PATH_D, trust_remote_code=True)
 model_d = AutoModelForCausalLM.from_pretrained(
     MODEL_PATH_D,
+    attn_implementation="sdpa",
     torch_dtype=torch.bfloat16,
     device_map="auto",
     trust_remote_code=True
     MODEL_ID_M,
     trust_remote_code=True,
     torch_dtype=torch.bfloat16,
+    attn_implementation="sdpa"
 ).to(device).eval()
+# Load DeepSeek-OCR
 MODEL_ID_DS = "deepseek-ai/DeepSeek-OCR"
 tokenizer_ds = AutoTokenizer.from_pretrained(MODEL_ID_DS, trust_remote_code=True)
 model_ds = AutoModel.from_pretrained(
     MODEL_ID_DS,
+    attn_implementation="sdpa",
     trust_remote_code=True,
     use_safetensors=True
 ).eval().to(device).to(torch.bfloat16)