badru commited on
Commit
4813fb1
·
verified ·
1 Parent(s): a4ad892

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -55
app.py CHANGED
@@ -1,8 +1,7 @@
1
  import streamlit as st
2
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
3
  from PIL import Image
4
- import cv2
5
- import numpy as np
6
 
7
  # Load the model and processor
8
  @st.cache_resource
@@ -13,68 +12,53 @@ def load_model():
13
 
14
  processor, model = load_model()
15
 
16
- # Helper function to preprocess the image and detect lines
17
- def detect_lines(image, min_height=20, min_width=100):
18
- # Convert the PIL image to a NumPy array
19
- image_np = np.array(image)
 
20
 
21
- # Convert to grayscale
22
- gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
23
 
24
- # Apply binary thresholding
25
- _, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
 
 
 
 
26
 
27
- # Dilate to merge nearby text
28
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
29
- dilated = cv2.dilate(binary, kernel, iterations=1)
30
-
31
- # Find contours
32
- contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
33
-
34
- # Sort contours top-to-bottom
35
- bounding_boxes = [cv2.boundingRect(c) for c in contours]
36
- bounding_boxes = sorted(bounding_boxes, key=lambda b: b[1]) # Sort by y-coordinate
37
-
38
- # Filter out small contours and merge nearby ones
39
- filtered_boxes = []
40
- for x, y, w, h in bounding_boxes:
41
- if h >= min_height and w >= min_width: # Filter small boxes
42
- filtered_boxes.append((x, y, w, h))
43
-
44
- # Extract individual lines as images
45
- line_images = []
46
- for (x, y, w, h) in filtered_boxes:
47
- line = image_np[y:y+h, x:x+w]
48
- line_images.append(line)
49
 
50
- return line_images
 
51
 
52
- # Streamlit app
53
- st.title("OCR API Service with Multiline Support")
 
 
 
54
 
55
- # Handle image upload
56
- uploaded_file = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])
57
 
58
- if uploaded_file is not None:
59
- try:
60
- # Load and process the uploaded image
61
- image = Image.open(uploaded_file).convert("RGB")
62
- line_images = detect_lines(image, min_height=30, min_width=100)
63
 
64
- # Perform OCR on each detected line
65
- extracted_text = ""
66
- for line_img in line_images:
67
- line_pil = Image.fromarray(line_img)
68
- pixel_values = processor(images=line_pil, return_tensors="pt").pixel_values
69
  generated_ids = model.generate(pixel_values)
70
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
71
- extracted_text += f"{generated_text}\n"
72
 
73
- # Simulate API-like JSON response
74
- json_response = {"extracted_text": extracted_text}
 
75
 
76
- # Return JSON response
77
- st.write(json_response) # This is the response to your CodeIgniter client
78
- except Exception as e:
79
- # Return an error response
80
- st.write({"error": str(e)})
 
1
  import streamlit as st
2
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
3
  from PIL import Image
4
+ import json
 
5
 
6
  # Load the model and processor
7
  @st.cache_resource
 
12
 
13
  processor, model = load_model()
14
 
15
+ # Check if the request is an API call
16
+ if st.runtime.scriptrunner.script_run_context.is_running_with_auth:
17
+ import io
18
+ from fastapi import FastAPI, File, UploadFile
19
+ from fastapi.responses import JSONResponse
20
 
21
+ app = FastAPI()
 
22
 
23
+ @app.post("/process_image")
24
+ async def process_image(image: UploadFile = File(...)):
25
+ try:
26
+ # Read the uploaded image
27
+ image_data = await image.read()
28
+ image = Image.open(io.BytesIO(image_data)).convert("RGB")
29
 
30
+ # Perform OCR
31
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values
32
+ generated_ids = model.generate(pixel_values)
33
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ # Return extracted text as JSON
36
+ return JSONResponse(content={"extracted_text": generated_text})
37
 
38
+ except Exception as e:
39
+ return JSONResponse(content={"error": str(e)}, status_code=500)
40
+ else:
41
+ # Streamlit UI for manual testing
42
+ st.title("OCR API Service")
43
 
44
+ uploaded_file = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])
 
45
 
46
+ if uploaded_file is not None:
47
+ try:
48
+ # Load and display the uploaded image
49
+ image = Image.open(uploaded_file).convert("RGB")
50
+ st.image(image, caption="Uploaded Image", use_column_width=True)
51
 
52
+ # Perform OCR
53
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values
 
 
 
54
  generated_ids = model.generate(pixel_values)
55
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
56
 
57
+ # Display extracted text
58
+ st.subheader("Extracted Text:")
59
+ st.text(generated_text)
60
 
61
+ except Exception as e:
62
+ st.error(f"An error occurred: {e}")
63
+ else:
64
+ st.info("Please upload an image to start the OCR process.")