Spaces:

rumman19
/

ocrr

Sleeping

rumman19 commited on Dec 19, 2024

Commit

e94bb18

verified ·

1 Parent(s): 2bc16bc

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+import pytesseract
+from PIL import Image
+# Set Tesseract path if needed
+# pytesseract.pytesseract.tesseract_cmd = "/path/to/tesseract"  # Update if needed
+def extract_text_from_image(image_path):
+    img = Image.open(image_path)
+    text = pytesseract.image_to_string(img, lang="eng+hin")
+    return text
+def ocr_and_search(image, keyword):
+    # Extract text from the uploaded image
+    text = extract_text_from_image(image)
+    # Highlight the keyword in red if found
+    if keyword.lower() in text.lower():
+        # Use case-insensitive replacement by matching case
+        highlighted_text = text.replace(
+            keyword, f'<span style="color: red; font-weight: bold;">{keyword}</span>'
+        )
+    else:
+        highlighted_text = "Keyword not found"
+    return text, highlighted_text
+# Define Gradio interface
+app = gr.Interface(
+    fn=ocr_and_search,
+    inputs=[gr.Image(type="filepath"), gr.Textbox(label="Keyword")],
+    outputs=[gr.Textbox(label="Extracted Text"), gr.HTML(label="Search Results")]
+)
+app.launch(share=True)