Atchyuteswar commited on
Commit
f335964
1 Parent(s): 193f3c1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -0
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pytesseract
3
+ import cv2
4
+
5
+ def extract_text(image):
6
+ """
7
+ Extracts text from an uploaded image using PyTesseract.
8
+
9
+ Args:
10
+ image: A PIL Image object or OpenCV image array containing the bill or document.
11
+
12
+ Returns:
13
+ str: The extracted text from the image.
14
+ """
15
+
16
+ # Handle potential errors gracefully
17
+ try:
18
+ # Preprocess the image for better text recognition (optional)
19
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
20
+ thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
21
+
22
+ # Use PyTesseract for text extraction
23
+ text = pytesseract.image_to_string(thresh, config='--psm 6') # Optimize for single block text
24
+ return text
25
+ except Exception as e:
26
+ return f"Error: {e}" # Informative error message
27
+
28
+ # Gradio interface for user interaction
29
+ iface = gr.Interface(
30
+ fn=extract_text,
31
+ inputs=gr.File(label="Upload Bill/Document (Image)"),
32
+ outputs=gr.Textbox(label="Extracted Text"),
33
+ title="Text Extractor for Bills and Documents",
34
+ description="Upload your bill or document to extract the text content.",
35
+ )
36
+
37
+ # Launch the Gradio app
38
+ iface.launch()