Spaces:

snake11235
/

words2csv

Running

snake11235 commited on 10 days ago

Commit

9e38f34

1 Parent(s): a8f90b0

feat: refactor model configuration to use unified MODELS_MAP with backend routing

Rename OPENAI_PRICING to MODELS_MAP and add backend field to each model configuration for dynamic backend selection. Update model routing logic to use backend field instead of hardcoded model name checks.

- Rename OPENAI_PRICING to MODELS_MAP in common.py
- Add "backend" field to all model configurations (openai/gemini/olmocr)
- Update process_document() to route based on MODELS_MAP[model_choice]["backend"]
- Update model

Files changed (4) hide show

app.py +6 -6
common.py +35 -35
olm_ocr.py +2 -2
openai_backend.py +2 -2

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ from PIL import Image
 from olmocr.data.renderpdf import render_pdf_to_base64png
 from openai_backend import _run_openai_vision
-from common import OPENAI_PRICING, MODEL_GEMINI, MODEL_OLMOCR
 from logging_helper import log as _log, log_debug as _log_debug
 from olm_ocr import _run_olmocr
@@ -118,12 +118,12 @@ def process_document(file_obj, model_choice: str, prompt: str):
             "You are an OCR-to-CSV assistant. Read the table or structured text in the image and output a valid "
             "CSV representation. Use commas as separators and include a header row if appropriate."
         )
-    if model_choice in OPENAI_PRICING:
         csv_text = _run_openai_vision(image, prompt, model_choice)
-    elif model_choice == MODEL_GEMINI:
         csv_text = _run_gemini_vision(image, prompt)
-    elif model_choice == MODEL_OLMOCR:
         csv_text = _run_olmocr(image, prompt)
     else:
         csv_text = f"Unknown model choice: {model_choice}"
@@ -148,7 +148,7 @@ def build_interface() -> gr.Blocks:
                 model_selector = gr.Dropdown(
                     label="LLM backend",
-                    choices=list(OPENAI_PRICING.keys()) + [MODEL_GEMINI, MODEL_OLMOCR],
                     value=MODEL_OLMOCR,
                 )

 from olmocr.data.renderpdf import render_pdf_to_base64png
 from openai_backend import _run_openai_vision
+from common import MODELS_MAP, MODEL_GEMINI, MODEL_OLMOCR
 from logging_helper import log as _log, log_debug as _log_debug
 from olm_ocr import _run_olmocr
             "You are an OCR-to-CSV assistant. Read the table or structured text in the image and output a valid "
             "CSV representation. Use commas as separators and include a header row if appropriate."
         )
+    _log_debug(f"Using model: {model_choice}")
+    if MODELS_MAP[model_choice]["backend"] == "openai":
         csv_text = _run_openai_vision(image, prompt, model_choice)
+    elif MODELS_MAP[model_choice]["backend"] == "gemini":
         csv_text = _run_gemini_vision(image, prompt)
+    elif MODELS_MAP[model_choice]["backend"] == "olmocr":
         csv_text = _run_olmocr(image, prompt)
     else:
         csv_text = f"Unknown model choice: {model_choice}"
                 model_selector = gr.Dropdown(
                     label="LLM backend",
+                    choices=list(MODELS_MAP.keys()),
                     value=MODEL_OLMOCR,
                 )

common.py CHANGED Viewed

@@ -2,52 +2,52 @@ MODEL_GEMINI = "Gemini 3 Pro"
 MODEL_OLMOCR = "olmOCR-2-7B-1025-FP8"
-OPENAI_PRICING = {
     # GPT-5.2 family
-    "gpt-5.2": {"input": 1.75, "output": 14.00},
-    "gpt-5.2-chat-latest": {"input": 1.75, "output": 14.00},
-    "gpt-5.2-pro": {"input": 21.00, "output": 168.00},
     # GPT-5.1 / GPT-5 family
-    "gpt-5.1": {"input": 1.25, "output": 10.00},
-    "gpt-5": {"input": 1.25, "output": 10.00},
-    "gpt-5-mini": {"input": 0.25, "output": 2.00},
-    "gpt-5-nano": {"input": 0.05, "output": 0.40},
-    "gpt-5.1-chat-latest": {"input": 1.25, "output": 10.00},
-    "gpt-5-chat-latest": {"input": 1.25, "output": 10.00},
-    "gpt-5.1-codex-max": {"input": 1.25, "output": 10.00},
-    "gpt-5.1-codex": {"input": 1.25, "output": 10.00},
-    "gpt-5-codex": {"input": 1.25, "output": 10.00},
-    "gpt-5.1-codex-mini": {"input": 0.25, "output": 2.00},
-    "gpt-5-pro": {"input": 15.00, "output": 120.00},
-    "gpt-5-search-api": {"input": 1.25, "output": 10.00},
     # GPT-4.1 family
-    "gpt-4.1": {"input": 2.00, "output": 8.00},
-    "gpt-4.1-mini": {"input": 0.40, "output": 1.60},
-    "gpt-4.1-nano": {"input": 0.10, "output": 0.40},
     # GPT-4o family
-    "gpt-4o": {"input": 2.50, "output": 10.00},
-    "gpt-4o-2024-05-13": {"input": 5.00, "output": 15.00},
-    "gpt-4o-mini": {"input": 0.15, "output": 0.60},
-    "chatgpt-4o-latest": {"input": 5.00, "output": 15.00},
     # GPT-4 Turbo / GPT-4 legacy family (from legacy models table)
-    "gpt-4-turbo": {"input": 10.00, "output": 30.00},
-    "gpt-4-turbo-2024-04-09": {"input": 10.00, "output": 30.00},
-    "gpt-4-0125-preview": {"input": 10.00, "output": 30.00},
-    "gpt-4-1106-preview": {"input": 10.00, "output": 30.00},
-    "gpt-4-1106-vision-preview": {"input": 10.00, "output": 30.00},
-    "gpt-4-0613": {"input": 30.00, "output": 60.00},
-    "gpt-4-0314": {"input": 30.00, "output": 60.00},
-    "gpt-4": {"input": 30.00, "output": 60.00},
-    "gpt-4-32k": {"input": 60.00, "output": 120.00},
     # Default
-    "default": {"input": 2.50, "output": 10.00},
     # Other backends (mock rates)
-    MODEL_GEMINI: {"input": 1.00, "output": 1.00},
-    MODEL_OLMOCR: {"input": 1.35, "output": 0.30},
 }

 MODEL_OLMOCR = "olmOCR-2-7B-1025-FP8"
+MODELS_MAP = {
     # GPT-5.2 family
+    "gpt-5.2": {"input": 1.75, "output": 14.00, "backend": "openai"},
+    "gpt-5.2-chat-latest": {"input": 1.75, "output": 14.00, "backend": "openai"},
+    "gpt-5.2-pro": {"input": 21.00, "output": 168.00, "backend": "openai"},
     # GPT-5.1 / GPT-5 family
+    "gpt-5.1": {"input": 1.25, "output": 10.00, "backend": "openai"},
+    "gpt-5": {"input": 1.25, "output": 10.00, "backend": "openai"},
+    "gpt-5-mini": {"input": 0.25, "output": 2.00, "backend": "openai"},
+    "gpt-5-nano": {"input": 0.05, "output": 0.40, "backend": "openai"},
+    "gpt-5.1-chat-latest": {"input": 1.25, "output": 10.00, "backend": "openai"},
+    "gpt-5-chat-latest": {"input": 1.25, "output": 10.00, "backend": "openai"},
+    "gpt-5.1-codex-max": {"input": 1.25, "output": 10.00, "backend": "openai"},
+    "gpt-5.1-codex": {"input": 1.25, "output": 10.00, "backend": "openai"},
+    "gpt-5-codex": {"input": 1.25, "output": 10.00, "backend": "openai"},
+    "gpt-5.1-codex-mini": {"input": 0.25, "output": 2.00, "backend": "openai"},
+    "gpt-5-pro": {"input": 15.00, "output": 120.00, "backend": "openai"},
+    "gpt-5-search-api": {"input": 1.25, "output": 10.00, "backend": "openai"},
     # GPT-4.1 family
+    "gpt-4.1": {"input": 2.00, "output": 8.00, "backend": "openai"},
+    "gpt-4.1-mini": {"input": 0.40, "output": 1.60, "backend": "openai"},
+    "gpt-4.1-nano": {"input": 0.10, "output": 0.40, "backend": "openai"},
     # GPT-4o family
+    "gpt-4o": {"input": 2.50, "output": 10.00, "backend": "openai"},
+    "gpt-4o-2024-05-13": {"input": 5.00, "output": 15.00, "backend": "openai"},
+    "gpt-4o-mini": {"input": 0.15, "output": 0.60, "backend": "openai"},
+    "chatgpt-4o-latest": {"input": 5.00, "output": 15.00, "backend": "openai"},
     # GPT-4 Turbo / GPT-4 legacy family (from legacy models table)
+    "gpt-4-turbo": {"input": 10.00, "output": 30.00, "backend": "openai"},
+    "gpt-4-turbo-2024-04-09": {"input": 10.00, "output": 30.00, "backend": "openai"},
+    "gpt-4-0125-preview": {"input": 10.00, "output": 30.00, "backend": "openai"},
+    "gpt-4-1106-preview": {"input": 10.00, "output": 30.00, "backend": "openai"},
+    "gpt-4-1106-vision-preview": {"input": 10.00, "output": 30.00, "backend": "openai"},
+    "gpt-4-0613": {"input": 30.00, "output": 60.00, "backend": "openai"},
+    "gpt-4-0314": {"input": 30.00, "output": 60.00, "backend": "openai"},
+    "gpt-4": {"input": 30.00, "output": 60.00, "backend": "openai"},
+    "gpt-4-32k": {"input": 60.00, "output": 120.00, "backend": "openai"},
     # Default
+    "default": {"input": 2.50, "output": 10.00, "backend": "openai"},
     # Other backends (mock rates)
+    MODEL_GEMINI: {"input": 1.00, "output": 1.00, "backend": "gemini"},
+    MODEL_OLMOCR: {"input": 1.35, "output": 0.30, "backend": "olmocr"},
 }

olm_ocr.py CHANGED Viewed

@@ -5,7 +5,7 @@ from PIL import Image
 from huggingface_hub import InferenceClient
 from image_utils import _pil_image_to_base64_jpeg
 from logging_helper import _log_model_response
-from common import OPENAI_PRICING
 MODEL_ID = "allenai/olmOCR-2-7B-1025-FP8"
@@ -56,7 +56,7 @@ def _run_olmocr(image: Image.Image, prompt: str) -> str:
         content=content,
         duration=duration,
         usage=completion.usage,
-        pricing=OPENAI_PRICING,
     )
     return content

 from huggingface_hub import InferenceClient
 from image_utils import _pil_image_to_base64_jpeg
 from logging_helper import _log_model_response
+from common import MODELS_MAP
 MODEL_ID = "allenai/olmOCR-2-7B-1025-FP8"
         content=content,
         duration=duration,
         usage=completion.usage,
+        pricing=MODELS_MAP,
     )
     return content

openai_backend.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import Optional
 from PIL import Image
 from logging_helper import log as _log, log_debug as _log_debug, _log_model_response
 from image_utils import _pil_image_to_base64_jpeg
-from common import OPENAI_PRICING
 try:
     from openai import OpenAI
@@ -56,7 +56,7 @@ def _run_openai_vision(image: Image.Image, prompt: str, model_name: str) -> str:
         content=content,
         duration=duration,
         usage=response.usage,
-        pricing=OPENAI_PRICING,
     )
     return content

 from PIL import Image
 from logging_helper import log as _log, log_debug as _log_debug, _log_model_response
 from image_utils import _pil_image_to_base64_jpeg
+from common import MODELS_MAP
 try:
     from openai import OpenAI
         content=content,
         duration=duration,
         usage=response.usage,
+        pricing=MODELS_MAP,
     )
     return content