Spaces:

ranggafermata
/

Angel-Eye-Copilot

Sleeping

App Files Files Community

ranggafermata commited on Oct 26

Commit

4913ddb

verified ·

1 Parent(s): e8c4f88

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -48

app.py CHANGED Viewed

@@ -142,33 +142,6 @@ model_x = Qwen2VLForConditionalGeneration.from_pretrained(
     torch_dtype=torch.float16
 ).to(device).eval()
-# Load Aya-Vision-8b
-MODEL_ID_A = "CohereForAI/aya-vision-8b"
-processor_a = AutoProcessor.from_pretrained(MODEL_ID_A, trust_remote_code=True)
-model_a = AutoModelForImageTextToText.from_pretrained(
-    MODEL_ID_A,
-    trust_remote_code=True,
-    torch_dtype=torch.float16
-).to(device).eval()
-# Load olmOCR-7B-0725
-MODEL_ID_W = "allenai/olmOCR-7B-0725"
-processor_w = AutoProcessor.from_pretrained(MODEL_ID_W, trust_remote_code=True)
-model_w = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_W,
-    trust_remote_code=True,
-    torch_dtype=torch.float16
-).to(device).eval()
-# Load RolmOCR
-MODEL_ID_M = "reducto/RolmOCR"
-processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
-model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_M,
-    trust_remote_code=True,
-    torch_dtype=torch.float16
-).to(device).eval()
 @spaces.GPU
 def generate_image(model_name: str, text: str, image: Image.Image,
@@ -178,21 +151,12 @@ def generate_image(model_name: str, text: str, image: Image.Image,
     Generates responses using the selected model for image input.
     Yields raw text and Markdown-formatted text.
     """
-    if model_name == "RolmOCR-7B":
-        processor = processor_m
-        model = model_m
-    elif model_name == "Qwen2-VL-OCR-2B":
         processor = processor_x
         model = model_x
     elif model_name == "Nanonets-OCR2-3B":
         processor = processor_v
         model = model_v
-    elif model_name == "Aya-Vision-8B":
-        processor = processor_a
-        model = model_a
-    elif model_name == "olmOCR-7B-0725":
-        processor = processor_w
-        model = model_w
     else:
         yield "Invalid model selected.", "Invalid model selected."
         return
@@ -237,17 +201,9 @@ def generate_image(model_name: str, text: str, image: Image.Image,
         yield buffer, buffer
-# Define examples for image inference
-image_examples = [
-    ["Extract the full page.", "images/ocr.png"],
-    ["Extract the content.", "images/4.png"],
-    ["Convert this page to doc [table] precisely for markdown.", "images/0.png"]
-]
 # Create the Gradio Interface
 with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
-    gr.Markdown("# **Multimodal OCR**", elem_id="main-title")
     with gr.Row():
         with gr.Column(scale=2):
             image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
@@ -273,8 +229,7 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
                     markdown_output = gr.Markdown(label="(Result.Md)")
                 model_choice = gr.Radio(
-                    choices=["Nanonets-OCR2-3B", "olmOCR-7B-0725", "RolmOCR-7B",
-                         "Aya-Vision-8B", "Qwen2-VL-OCR-2B"],
                     label="Select Model",
                     value="Nanonets-OCR2-3B"
                 )

     torch_dtype=torch.float16
 ).to(device).eval()
 @spaces.GPU
 def generate_image(model_name: str, text: str, image: Image.Image,
     Generates responses using the selected model for image input.
     Yields raw text and Markdown-formatted text.
     """
+    if model_name == "Qwen2-VL-OCR-2B":
         processor = processor_x
         model = model_x
     elif model_name == "Nanonets-OCR2-3B":
         processor = processor_v
         model = model_v
     else:
         yield "Invalid model selected.", "Invalid model selected."
         return
         yield buffer, buffer
 # Create the Gradio Interface
 with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
+    gr.Markdown("# **Angel's Eye - Copilot**", elem_id="main-title")
     with gr.Row():
         with gr.Column(scale=2):
             image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
                     markdown_output = gr.Markdown(label="(Result.Md)")
                 model_choice = gr.Radio(
+                    choices=["Nanonets-OCR2-3B", "Qwen2-VL-OCR-2B"],
                     label="Select Model",
                     value="Nanonets-OCR2-3B"
                 )