ranggafermata commited on
Commit
4913ddb
·
verified ·
1 Parent(s): e8c4f88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -48
app.py CHANGED
@@ -142,33 +142,6 @@ model_x = Qwen2VLForConditionalGeneration.from_pretrained(
142
  torch_dtype=torch.float16
143
  ).to(device).eval()
144
 
145
- # Load Aya-Vision-8b
146
- MODEL_ID_A = "CohereForAI/aya-vision-8b"
147
- processor_a = AutoProcessor.from_pretrained(MODEL_ID_A, trust_remote_code=True)
148
- model_a = AutoModelForImageTextToText.from_pretrained(
149
- MODEL_ID_A,
150
- trust_remote_code=True,
151
- torch_dtype=torch.float16
152
- ).to(device).eval()
153
-
154
- # Load olmOCR-7B-0725
155
- MODEL_ID_W = "allenai/olmOCR-7B-0725"
156
- processor_w = AutoProcessor.from_pretrained(MODEL_ID_W, trust_remote_code=True)
157
- model_w = Qwen2_5_VLForConditionalGeneration.from_pretrained(
158
- MODEL_ID_W,
159
- trust_remote_code=True,
160
- torch_dtype=torch.float16
161
- ).to(device).eval()
162
-
163
- # Load RolmOCR
164
- MODEL_ID_M = "reducto/RolmOCR"
165
- processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
166
- model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
167
- MODEL_ID_M,
168
- trust_remote_code=True,
169
- torch_dtype=torch.float16
170
- ).to(device).eval()
171
-
172
 
173
  @spaces.GPU
174
  def generate_image(model_name: str, text: str, image: Image.Image,
@@ -178,21 +151,12 @@ def generate_image(model_name: str, text: str, image: Image.Image,
178
  Generates responses using the selected model for image input.
179
  Yields raw text and Markdown-formatted text.
180
  """
181
- if model_name == "RolmOCR-7B":
182
- processor = processor_m
183
- model = model_m
184
- elif model_name == "Qwen2-VL-OCR-2B":
185
  processor = processor_x
186
  model = model_x
187
  elif model_name == "Nanonets-OCR2-3B":
188
  processor = processor_v
189
  model = model_v
190
- elif model_name == "Aya-Vision-8B":
191
- processor = processor_a
192
- model = model_a
193
- elif model_name == "olmOCR-7B-0725":
194
- processor = processor_w
195
- model = model_w
196
  else:
197
  yield "Invalid model selected.", "Invalid model selected."
198
  return
@@ -237,17 +201,9 @@ def generate_image(model_name: str, text: str, image: Image.Image,
237
  yield buffer, buffer
238
 
239
 
240
- # Define examples for image inference
241
- image_examples = [
242
- ["Extract the full page.", "images/ocr.png"],
243
- ["Extract the content.", "images/4.png"],
244
- ["Convert this page to doc [table] precisely for markdown.", "images/0.png"]
245
- ]
246
-
247
-
248
  # Create the Gradio Interface
249
  with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
250
- gr.Markdown("# **Multimodal OCR**", elem_id="main-title")
251
  with gr.Row():
252
  with gr.Column(scale=2):
253
  image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
@@ -273,8 +229,7 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
273
  markdown_output = gr.Markdown(label="(Result.Md)")
274
 
275
  model_choice = gr.Radio(
276
- choices=["Nanonets-OCR2-3B", "olmOCR-7B-0725", "RolmOCR-7B",
277
- "Aya-Vision-8B", "Qwen2-VL-OCR-2B"],
278
  label="Select Model",
279
  value="Nanonets-OCR2-3B"
280
  )
 
142
  torch_dtype=torch.float16
143
  ).to(device).eval()
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  @spaces.GPU
147
  def generate_image(model_name: str, text: str, image: Image.Image,
 
151
  Generates responses using the selected model for image input.
152
  Yields raw text and Markdown-formatted text.
153
  """
154
+ if model_name == "Qwen2-VL-OCR-2B":
 
 
 
155
  processor = processor_x
156
  model = model_x
157
  elif model_name == "Nanonets-OCR2-3B":
158
  processor = processor_v
159
  model = model_v
 
 
 
 
 
 
160
  else:
161
  yield "Invalid model selected.", "Invalid model selected."
162
  return
 
201
  yield buffer, buffer
202
 
203
 
 
 
 
 
 
 
 
 
204
  # Create the Gradio Interface
205
  with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
206
+ gr.Markdown("# **Angel's Eye - Copilot**", elem_id="main-title")
207
  with gr.Row():
208
  with gr.Column(scale=2):
209
  image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
 
229
  markdown_output = gr.Markdown(label="(Result.Md)")
230
 
231
  model_choice = gr.Radio(
232
+ choices=["Nanonets-OCR2-3B", "Qwen2-VL-OCR-2B"],
 
233
  label="Select Model",
234
  value="Nanonets-OCR2-3B"
235
  )