prithivMLmods commited on
Commit
e530de7
·
verified ·
1 Parent(s): 13adaf2

update app

Browse files
Files changed (1) hide show
  1. app.py +129 -19
app.py CHANGED
@@ -1,22 +1,100 @@
1
  import os
2
  import sys
 
 
 
3
  import time
4
  from threading import Thread
 
 
5
 
6
  import gradio as gr
7
  import spaces
8
  import torch
 
9
  from PIL import Image
 
10
 
11
  from transformers import (
12
  Qwen2_5_VLForConditionalGeneration,
13
  Qwen3VLForConditionalGeneration,
 
14
  AutoModelForCausalLM,
15
  AutoProcessor,
16
  TextIteratorStreamer,
17
  )
18
 
19
- from theme import steel_blue_theme
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  css = """
22
  #main-title h1 {
@@ -44,6 +122,43 @@ if torch.cuda.is_available():
44
 
45
  print("Using device:", device)
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  # Load Chandra-OCR
48
  MODEL_ID_V = "datalab-to/chandra"
49
  processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
@@ -82,7 +197,6 @@ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
82
  torch_dtype=torch.float16
83
  ).to(device).eval()
84
 
85
-
86
  @spaces.GPU
87
  def generate_image(model_name: str, text: str, image: Image.Image,
88
  max_new_tokens: int, temperature: float, top_p: float,
@@ -146,7 +260,6 @@ def generate_image(model_name: str, text: str, image: Image.Image,
146
  time.sleep(0.01)
147
  yield buffer, buffer
148
 
149
-
150
  image_examples = [
151
  ["OCR the content perfectly.", "examples/3.jpg"],
152
  ["Perform OCR on the image.", "examples/1.jpg"],
@@ -167,30 +280,27 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
167
  )
168
 
169
  with gr.Accordion("Advanced options", open=False):
170
- max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1,
171
- value=DEFAULT_MAX_NEW_TOKENS)
172
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.7)
173
  top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
174
  top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
175
- repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05,
176
- value=1.1)
177
 
178
  with gr.Column(scale=3):
179
- gr.Markdown("## Output", elem_id="output-title")
180
- output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=11, show_copy_button=True)
181
- with gr.Accordion("(Result.md)", open=False):
182
- markdown_output = gr.Markdown(label="(Result.Md)")
183
-
184
- model_choice = gr.Radio(
185
- choices=["Nanonets-OCR2-3B", "Chandra-OCR", "Dots.OCR", "olmOCR-2-7B-1025"],
186
- label="Select Model",
187
- value="Nanonets-OCR2-3B"
188
- )
189
 
190
  image_submit.click(
191
  fn=generate_image,
192
- inputs=[model_choice, image_query, image_upload, max_new_tokens, temperature, top_p, top_k,
193
- repetition_penalty],
194
  outputs=[output, markdown_output]
195
  )
196
 
 
1
  import os
2
  import sys
3
+ import random
4
+ import uuid
5
+ import json
6
  import time
7
  from threading import Thread
8
+ from typing import Iterable
9
+ from huggingface_hub import snapshot_download
10
 
11
  import gradio as gr
12
  import spaces
13
  import torch
14
+ import numpy as np
15
  from PIL import Image
16
+ import cv2
17
 
18
  from transformers import (
19
  Qwen2_5_VLForConditionalGeneration,
20
  Qwen3VLForConditionalGeneration,
21
+ AutoModelForImageTextToText,
22
  AutoModelForCausalLM,
23
  AutoProcessor,
24
  TextIteratorStreamer,
25
  )
26
 
27
+ from transformers.image_utils import load_image
28
+ from gradio.themes import Soft
29
+ from gradio.themes.utils import colors, fonts, sizes
30
+
31
+ colors.steel_blue = colors.Color(
32
+ name="steel_blue",
33
+ c50="#EBF3F8",
34
+ c100="#D3E5F0",
35
+ c200="#A8CCE1",
36
+ c300="#7DB3D2",
37
+ c400="#529AC3",
38
+ c500="#4682B4",
39
+ c600="#3E72A0",
40
+ c700="#36638C",
41
+ c800="#2E5378",
42
+ c900="#264364",
43
+ c950="#1E3450",
44
+ )
45
+
46
+ class SteelBlueTheme(Soft):
47
+ def __init__(
48
+ self,
49
+ *,
50
+ primary_hue: colors.Color | str = colors.gray,
51
+ secondary_hue: colors.Color | str = colors.steel_blue,
52
+ neutral_hue: colors.Color | str = colors.slate,
53
+ text_size: sizes.Size | str = sizes.text_lg,
54
+ font: fonts.Font | str | Iterable[fonts.Font | str] = (
55
+ fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
56
+ ),
57
+ font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
58
+ fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
59
+ ),
60
+ ):
61
+ super().__init__(
62
+ primary_hue=primary_hue,
63
+ secondary_hue=secondary_hue,
64
+ neutral_hue=neutral_hue,
65
+ text_size=text_size,
66
+ font=font,
67
+ font_mono=font_mono,
68
+ )
69
+ super().set(
70
+ background_fill_primary="*primary_50",
71
+ background_fill_primary_dark="*primary_900",
72
+ body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
73
+ body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
74
+ button_primary_text_color="white",
75
+ button_primary_text_color_hover="white",
76
+ button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
77
+ button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
78
+ button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_800)",
79
+ button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_500)",
80
+ button_secondary_text_color="black",
81
+ button_secondary_text_color_hover="white",
82
+ button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
83
+ button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
84
+ button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
85
+ button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
86
+ slider_color="*secondary_500",
87
+ slider_color_dark="*secondary_600",
88
+ block_title_text_weight="600",
89
+ block_border_width="3px",
90
+ block_shadow="*shadow_drop_lg",
91
+ button_primary_shadow="*shadow_drop_lg",
92
+ button_large_padding="11px",
93
+ color_accent_soft="*primary_100",
94
+ block_label_background_fill="*primary_200",
95
+ )
96
+
97
+ steel_blue_theme = SteelBlueTheme()
98
 
99
  css = """
100
  #main-title h1 {
 
122
 
123
  print("Using device:", device)
124
 
125
+ # CACHE_PATH = "./model_cache"
126
+ # if not os.path.exists(CACHE_PATH):
127
+ # os.makedirs(CACHE_PATH)
128
+ #
129
+ # model_path_d_local = snapshot_download(
130
+ # repo_id='rednote-hilab/dots.ocr',
131
+ # local_dir=os.path.join(CACHE_PATH, 'dots.ocr'),
132
+ # max_workers=20,
133
+ # local_dir_use_symlinks=False
134
+ # )
135
+ #
136
+ # config_file_path = os.path.join(model_path_d_local, "configuration_dots.py")
137
+ #
138
+ # if os.path.exists(config_file_path):
139
+ # with open(config_file_path, 'r') as f:
140
+ # input_code = f.read()
141
+ #
142
+ # lines = input_code.splitlines()
143
+ # if "class DotsVLProcessor" in input_code and not any("attributes = " in line for line in lines):
144
+ # output_lines = []
145
+ # for line in lines:
146
+ # output_lines.append(line)
147
+ # if line.strip().startswith("class DotsVLProcessor"):
148
+ # output_lines.append(" attributes = [\"image_processor\", \"tokenizer\"]")
149
+ #
150
+ # with open(config_file_path, 'w') as f:
151
+ # f.write('\n'.join(output_lines))
152
+ # print("Patched configuration_dots.py successfully.")
153
+ #
154
+ #sys.path.append(model_path_d_local)
155
+
156
+ MAX_MAX_NEW_TOKENS = 4096
157
+ DEFAULT_MAX_NEW_TOKENS = 2048
158
+ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
159
+
160
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
161
+
162
  # Load Chandra-OCR
163
  MODEL_ID_V = "datalab-to/chandra"
164
  processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
 
197
  torch_dtype=torch.float16
198
  ).to(device).eval()
199
 
 
200
  @spaces.GPU
201
  def generate_image(model_name: str, text: str, image: Image.Image,
202
  max_new_tokens: int, temperature: float, top_p: float,
 
260
  time.sleep(0.01)
261
  yield buffer, buffer
262
 
 
263
  image_examples = [
264
  ["OCR the content perfectly.", "examples/3.jpg"],
265
  ["Perform OCR on the image.", "examples/1.jpg"],
 
280
  )
281
 
282
  with gr.Accordion("Advanced options", open=False):
283
+ max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
 
284
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.7)
285
  top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
286
  top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
287
+ repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.1)
 
288
 
289
  with gr.Column(scale=3):
290
+ gr.Markdown("## Output", elem_id="output-title")
291
+ output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=11, show_copy_button=True)
292
+ with gr.Accordion("(Result.md)", open=False):
293
+ markdown_output = gr.Markdown(label="(Result.Md)")
294
+
295
+ model_choice = gr.Radio(
296
+ choices=["Nanonets-OCR2-3B", "Chandra-OCR", "Dots.OCR", "olmOCR-2-7B-1025"],
297
+ label="Select Model",
298
+ value="Nanonets-OCR2-3B"
299
+ )
300
 
301
  image_submit.click(
302
  fn=generate_image,
303
+ inputs=[model_choice, image_query, image_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
 
304
  outputs=[output, markdown_output]
305
  )
306