snake11235 commited on
Commit
9e38f34
·
1 Parent(s): a8f90b0

feat: refactor model configuration to use unified MODELS_MAP with backend routing

Browse files

Rename OPENAI_PRICING to MODELS_MAP and add backend field to each model configuration for dynamic backend selection. Update model routing logic to use backend field instead of hardcoded model name checks.

- Rename OPENAI_PRICING to MODELS_MAP in common.py
- Add "backend" field to all model configurations (openai/gemini/olmocr)
- Update process_document() to route based on MODELS_MAP[model_choice]["backend"]
- Update model

Files changed (4) hide show
  1. app.py +6 -6
  2. common.py +35 -35
  3. olm_ocr.py +2 -2
  4. openai_backend.py +2 -2
app.py CHANGED
@@ -14,7 +14,7 @@ from PIL import Image
14
  from olmocr.data.renderpdf import render_pdf_to_base64png
15
 
16
  from openai_backend import _run_openai_vision
17
- from common import OPENAI_PRICING, MODEL_GEMINI, MODEL_OLMOCR
18
  from logging_helper import log as _log, log_debug as _log_debug
19
  from olm_ocr import _run_olmocr
20
 
@@ -118,12 +118,12 @@ def process_document(file_obj, model_choice: str, prompt: str):
118
  "You are an OCR-to-CSV assistant. Read the table or structured text in the image and output a valid "
119
  "CSV representation. Use commas as separators and include a header row if appropriate."
120
  )
121
-
122
- if model_choice in OPENAI_PRICING:
123
  csv_text = _run_openai_vision(image, prompt, model_choice)
124
- elif model_choice == MODEL_GEMINI:
125
  csv_text = _run_gemini_vision(image, prompt)
126
- elif model_choice == MODEL_OLMOCR:
127
  csv_text = _run_olmocr(image, prompt)
128
  else:
129
  csv_text = f"Unknown model choice: {model_choice}"
@@ -148,7 +148,7 @@ def build_interface() -> gr.Blocks:
148
 
149
  model_selector = gr.Dropdown(
150
  label="LLM backend",
151
- choices=list(OPENAI_PRICING.keys()) + [MODEL_GEMINI, MODEL_OLMOCR],
152
  value=MODEL_OLMOCR,
153
  )
154
 
 
14
  from olmocr.data.renderpdf import render_pdf_to_base64png
15
 
16
  from openai_backend import _run_openai_vision
17
+ from common import MODELS_MAP, MODEL_GEMINI, MODEL_OLMOCR
18
  from logging_helper import log as _log, log_debug as _log_debug
19
  from olm_ocr import _run_olmocr
20
 
 
118
  "You are an OCR-to-CSV assistant. Read the table or structured text in the image and output a valid "
119
  "CSV representation. Use commas as separators and include a header row if appropriate."
120
  )
121
+ _log_debug(f"Using model: {model_choice}")
122
+ if MODELS_MAP[model_choice]["backend"] == "openai":
123
  csv_text = _run_openai_vision(image, prompt, model_choice)
124
+ elif MODELS_MAP[model_choice]["backend"] == "gemini":
125
  csv_text = _run_gemini_vision(image, prompt)
126
+ elif MODELS_MAP[model_choice]["backend"] == "olmocr":
127
  csv_text = _run_olmocr(image, prompt)
128
  else:
129
  csv_text = f"Unknown model choice: {model_choice}"
 
148
 
149
  model_selector = gr.Dropdown(
150
  label="LLM backend",
151
+ choices=list(MODELS_MAP.keys()),
152
  value=MODEL_OLMOCR,
153
  )
154
 
common.py CHANGED
@@ -2,52 +2,52 @@ MODEL_GEMINI = "Gemini 3 Pro"
2
  MODEL_OLMOCR = "olmOCR-2-7B-1025-FP8"
3
 
4
 
5
- OPENAI_PRICING = {
6
  # GPT-5.2 family
7
- "gpt-5.2": {"input": 1.75, "output": 14.00},
8
- "gpt-5.2-chat-latest": {"input": 1.75, "output": 14.00},
9
- "gpt-5.2-pro": {"input": 21.00, "output": 168.00},
10
 
11
  # GPT-5.1 / GPT-5 family
12
- "gpt-5.1": {"input": 1.25, "output": 10.00},
13
- "gpt-5": {"input": 1.25, "output": 10.00},
14
- "gpt-5-mini": {"input": 0.25, "output": 2.00},
15
- "gpt-5-nano": {"input": 0.05, "output": 0.40},
16
- "gpt-5.1-chat-latest": {"input": 1.25, "output": 10.00},
17
- "gpt-5-chat-latest": {"input": 1.25, "output": 10.00},
18
- "gpt-5.1-codex-max": {"input": 1.25, "output": 10.00},
19
- "gpt-5.1-codex": {"input": 1.25, "output": 10.00},
20
- "gpt-5-codex": {"input": 1.25, "output": 10.00},
21
- "gpt-5.1-codex-mini": {"input": 0.25, "output": 2.00},
22
- "gpt-5-pro": {"input": 15.00, "output": 120.00},
23
- "gpt-5-search-api": {"input": 1.25, "output": 10.00},
24
 
25
  # GPT-4.1 family
26
- "gpt-4.1": {"input": 2.00, "output": 8.00},
27
- "gpt-4.1-mini": {"input": 0.40, "output": 1.60},
28
- "gpt-4.1-nano": {"input": 0.10, "output": 0.40},
29
 
30
  # GPT-4o family
31
- "gpt-4o": {"input": 2.50, "output": 10.00},
32
- "gpt-4o-2024-05-13": {"input": 5.00, "output": 15.00},
33
- "gpt-4o-mini": {"input": 0.15, "output": 0.60},
34
- "chatgpt-4o-latest": {"input": 5.00, "output": 15.00},
35
 
36
  # GPT-4 Turbo / GPT-4 legacy family (from legacy models table)
37
- "gpt-4-turbo": {"input": 10.00, "output": 30.00},
38
- "gpt-4-turbo-2024-04-09": {"input": 10.00, "output": 30.00},
39
- "gpt-4-0125-preview": {"input": 10.00, "output": 30.00},
40
- "gpt-4-1106-preview": {"input": 10.00, "output": 30.00},
41
- "gpt-4-1106-vision-preview": {"input": 10.00, "output": 30.00},
42
- "gpt-4-0613": {"input": 30.00, "output": 60.00},
43
- "gpt-4-0314": {"input": 30.00, "output": 60.00},
44
- "gpt-4": {"input": 30.00, "output": 60.00},
45
- "gpt-4-32k": {"input": 60.00, "output": 120.00},
46
 
47
  # Default
48
- "default": {"input": 2.50, "output": 10.00},
49
 
50
  # Other backends (mock rates)
51
- MODEL_GEMINI: {"input": 1.00, "output": 1.00},
52
- MODEL_OLMOCR: {"input": 1.35, "output": 0.30},
53
  }
 
2
  MODEL_OLMOCR = "olmOCR-2-7B-1025-FP8"
3
 
4
 
5
+ MODELS_MAP = {
6
  # GPT-5.2 family
7
+ "gpt-5.2": {"input": 1.75, "output": 14.00, "backend": "openai"},
8
+ "gpt-5.2-chat-latest": {"input": 1.75, "output": 14.00, "backend": "openai"},
9
+ "gpt-5.2-pro": {"input": 21.00, "output": 168.00, "backend": "openai"},
10
 
11
  # GPT-5.1 / GPT-5 family
12
+ "gpt-5.1": {"input": 1.25, "output": 10.00, "backend": "openai"},
13
+ "gpt-5": {"input": 1.25, "output": 10.00, "backend": "openai"},
14
+ "gpt-5-mini": {"input": 0.25, "output": 2.00, "backend": "openai"},
15
+ "gpt-5-nano": {"input": 0.05, "output": 0.40, "backend": "openai"},
16
+ "gpt-5.1-chat-latest": {"input": 1.25, "output": 10.00, "backend": "openai"},
17
+ "gpt-5-chat-latest": {"input": 1.25, "output": 10.00, "backend": "openai"},
18
+ "gpt-5.1-codex-max": {"input": 1.25, "output": 10.00, "backend": "openai"},
19
+ "gpt-5.1-codex": {"input": 1.25, "output": 10.00, "backend": "openai"},
20
+ "gpt-5-codex": {"input": 1.25, "output": 10.00, "backend": "openai"},
21
+ "gpt-5.1-codex-mini": {"input": 0.25, "output": 2.00, "backend": "openai"},
22
+ "gpt-5-pro": {"input": 15.00, "output": 120.00, "backend": "openai"},
23
+ "gpt-5-search-api": {"input": 1.25, "output": 10.00, "backend": "openai"},
24
 
25
  # GPT-4.1 family
26
+ "gpt-4.1": {"input": 2.00, "output": 8.00, "backend": "openai"},
27
+ "gpt-4.1-mini": {"input": 0.40, "output": 1.60, "backend": "openai"},
28
+ "gpt-4.1-nano": {"input": 0.10, "output": 0.40, "backend": "openai"},
29
 
30
  # GPT-4o family
31
+ "gpt-4o": {"input": 2.50, "output": 10.00, "backend": "openai"},
32
+ "gpt-4o-2024-05-13": {"input": 5.00, "output": 15.00, "backend": "openai"},
33
+ "gpt-4o-mini": {"input": 0.15, "output": 0.60, "backend": "openai"},
34
+ "chatgpt-4o-latest": {"input": 5.00, "output": 15.00, "backend": "openai"},
35
 
36
  # GPT-4 Turbo / GPT-4 legacy family (from legacy models table)
37
+ "gpt-4-turbo": {"input": 10.00, "output": 30.00, "backend": "openai"},
38
+ "gpt-4-turbo-2024-04-09": {"input": 10.00, "output": 30.00, "backend": "openai"},
39
+ "gpt-4-0125-preview": {"input": 10.00, "output": 30.00, "backend": "openai"},
40
+ "gpt-4-1106-preview": {"input": 10.00, "output": 30.00, "backend": "openai"},
41
+ "gpt-4-1106-vision-preview": {"input": 10.00, "output": 30.00, "backend": "openai"},
42
+ "gpt-4-0613": {"input": 30.00, "output": 60.00, "backend": "openai"},
43
+ "gpt-4-0314": {"input": 30.00, "output": 60.00, "backend": "openai"},
44
+ "gpt-4": {"input": 30.00, "output": 60.00, "backend": "openai"},
45
+ "gpt-4-32k": {"input": 60.00, "output": 120.00, "backend": "openai"},
46
 
47
  # Default
48
+ "default": {"input": 2.50, "output": 10.00, "backend": "openai"},
49
 
50
  # Other backends (mock rates)
51
+ MODEL_GEMINI: {"input": 1.00, "output": 1.00, "backend": "gemini"},
52
+ MODEL_OLMOCR: {"input": 1.35, "output": 0.30, "backend": "olmocr"},
53
  }
olm_ocr.py CHANGED
@@ -5,7 +5,7 @@ from PIL import Image
5
  from huggingface_hub import InferenceClient
6
  from image_utils import _pil_image_to_base64_jpeg
7
  from logging_helper import _log_model_response
8
- from common import OPENAI_PRICING
9
 
10
 
11
  MODEL_ID = "allenai/olmOCR-2-7B-1025-FP8"
@@ -56,7 +56,7 @@ def _run_olmocr(image: Image.Image, prompt: str) -> str:
56
  content=content,
57
  duration=duration,
58
  usage=completion.usage,
59
- pricing=OPENAI_PRICING,
60
  )
61
 
62
  return content
 
5
  from huggingface_hub import InferenceClient
6
  from image_utils import _pil_image_to_base64_jpeg
7
  from logging_helper import _log_model_response
8
+ from common import MODELS_MAP
9
 
10
 
11
  MODEL_ID = "allenai/olmOCR-2-7B-1025-FP8"
 
56
  content=content,
57
  duration=duration,
58
  usage=completion.usage,
59
+ pricing=MODELS_MAP,
60
  )
61
 
62
  return content
openai_backend.py CHANGED
@@ -6,7 +6,7 @@ from typing import Optional
6
  from PIL import Image
7
  from logging_helper import log as _log, log_debug as _log_debug, _log_model_response
8
  from image_utils import _pil_image_to_base64_jpeg
9
- from common import OPENAI_PRICING
10
 
11
  try:
12
  from openai import OpenAI
@@ -56,7 +56,7 @@ def _run_openai_vision(image: Image.Image, prompt: str, model_name: str) -> str:
56
  content=content,
57
  duration=duration,
58
  usage=response.usage,
59
- pricing=OPENAI_PRICING,
60
  )
61
 
62
  return content
 
6
  from PIL import Image
7
  from logging_helper import log as _log, log_debug as _log_debug, _log_model_response
8
  from image_utils import _pil_image_to_base64_jpeg
9
+ from common import MODELS_MAP
10
 
11
  try:
12
  from openai import OpenAI
 
56
  content=content,
57
  duration=duration,
58
  usage=response.usage,
59
+ pricing=MODELS_MAP,
60
  )
61
 
62
  return content