Update app.py
Browse files
app.py
CHANGED
@@ -11,12 +11,13 @@ from huggingface_hub import snapshot_download
|
|
11 |
import easyocr
|
12 |
import re
|
13 |
from typing import Tuple
|
|
|
14 |
|
15 |
# Disable CUDA (Optional: as per your original code)
|
16 |
torch.cuda.is_available = lambda: False
|
17 |
|
18 |
# Set Streamlit page configuration
|
19 |
-
st.set_page_config(page_title="
|
20 |
|
21 |
@st.cache_resource
|
22 |
def setup_got_model() -> Tuple[object, object]:
|
@@ -50,7 +51,7 @@ def perform_got_ocr(model, processor, image: Image.Image) -> str:
|
|
50 |
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
51 |
return generated_text
|
52 |
|
53 |
-
def perform_easyocr(ocr_reader, image:
|
54 |
results = ocr_reader.readtext(image, detail=0, paragraph=True)
|
55 |
extracted_text = '\n'.join(results)
|
56 |
return extracted_text
|
@@ -74,7 +75,7 @@ def main():
|
|
74 |
ocr_reader = setup_easyocr()
|
75 |
ocr_model = 'EasyOCR'
|
76 |
|
77 |
-
st.title("
|
78 |
|
79 |
# Sidebar for instructions
|
80 |
st.sidebar.header("Instructions")
|
@@ -90,15 +91,23 @@ def main():
|
|
90 |
|
91 |
if uploaded_file is not None:
|
92 |
try:
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
# Perform OCR
|
97 |
with st.spinner(f"Performing OCR using {ocr_model}..."):
|
98 |
if ocr_model == 'GOT-OCR2_0' and model and processor:
|
99 |
-
extracted_text = perform_got_ocr(model, processor,
|
100 |
else:
|
101 |
-
extracted_text = perform_easyocr(ocr_reader,
|
102 |
|
103 |
st.subheader("Extracted Text:")
|
104 |
st.text_area("Text", extracted_text, height=200)
|
@@ -118,4 +127,4 @@ def main():
|
|
118 |
st.info("Please upload an image file to get started.")
|
119 |
|
120 |
if __name__ == "__main__":
|
121 |
-
main()
|
|
|
11 |
import easyocr
|
12 |
import re
|
13 |
from typing import Tuple
|
14 |
+
import numpy as np
|
15 |
|
16 |
# Disable CUDA (Optional: as per your original code)
|
17 |
torch.cuda.is_available = lambda: False
|
18 |
|
19 |
# Set Streamlit page configuration
|
20 |
+
st.set_page_config(page_title="Bilingual OCR App", layout="wide")
|
21 |
|
22 |
@st.cache_resource
|
23 |
def setup_got_model() -> Tuple[object, object]:
|
|
|
51 |
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
52 |
return generated_text
|
53 |
|
54 |
+
def perform_easyocr(ocr_reader, image: np.ndarray) -> str:
|
55 |
results = ocr_reader.readtext(image, detail=0, paragraph=True)
|
56 |
extracted_text = '\n'.join(results)
|
57 |
return extracted_text
|
|
|
75 |
ocr_reader = setup_easyocr()
|
76 |
ocr_model = 'EasyOCR'
|
77 |
|
78 |
+
st.title("Bilingual OCR Application")
|
79 |
|
80 |
# Sidebar for instructions
|
81 |
st.sidebar.header("Instructions")
|
|
|
91 |
|
92 |
if uploaded_file is not None:
|
93 |
try:
|
94 |
+
# Read the file into bytes
|
95 |
+
image_bytes = uploaded_file.read()
|
96 |
+
|
97 |
+
# Open the image with PIL
|
98 |
+
image_pil = Image.open(BytesIO(image_bytes)).convert('RGB')
|
99 |
+
|
100 |
+
# Convert to numpy array for EasyOCR
|
101 |
+
image_np = np.array(image_pil)
|
102 |
+
|
103 |
+
st.image(image_pil, caption="Uploaded Image", use_column_width=True)
|
104 |
|
105 |
# Perform OCR
|
106 |
with st.spinner(f"Performing OCR using {ocr_model}..."):
|
107 |
if ocr_model == 'GOT-OCR2_0' and model and processor:
|
108 |
+
extracted_text = perform_got_ocr(model, processor, image_pil)
|
109 |
else:
|
110 |
+
extracted_text = perform_easyocr(ocr_reader, image_np)
|
111 |
|
112 |
st.subheader("Extracted Text:")
|
113 |
st.text_area("Text", extracted_text, height=200)
|
|
|
127 |
st.info("Please upload an image file to get started.")
|
128 |
|
129 |
if __name__ == "__main__":
|
130 |
+
main()
|