File size: 1,698 Bytes
3b55d0a
 
8785ffa
3b55d0a
8785ffa
 
3b55d0a
 
 
8785ffa
 
 
 
 
 
3b55d0a
 
 
081377f
 
3b55d0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
081377f
3b55d0a
 
 
 
 
 
 
 
8785ffa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import streamlit as st
from PIL import Image
import easyocr

# Initialize the EasyOCR reader
reader = easyocr.Reader(['en', 'hi'], gpu=False)  # 'en' for English, 'hi' for Hindi

# Function to process image and perform OCR
def process_image(image):
    img = Image.open(image)
    # Perform OCR
    result = reader.readtext(img, detail=0, paragraph=False)  # Return detailed results
    # Join the extracted text with spaces and separate words with new lines
    words = [word for block in result for word in block.split()]
    return "\n".join(words)

# Function to highlight keywords in extracted text
def highlight_keywords(text, keyword):
    highlighted_text = text.replace(keyword, f"**{keyword}**")
    return highlighted_text

# Streamlit app UI
st.title("OCR Web App for Hindi & English Text")
st.write("Upload an image with Hindi and English text, extract the text, and search for keywords.")

# File uploader for images
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    # Display the uploaded image
    image = Image.open(uploaded_file)
    st.image(image, caption="Uploaded Image", use_column_width=True)
    
    # Perform OCR on the uploaded image
    st.write("Extracting text...")
    extracted_text = process_image(uploaded_file)

    # Display the extracted text
    st.subheader("Extracted Text:")
    st.text(extracted_text)

    # Search functionality
    query = st.text_input("Enter a keyword to search in the extracted text:")
    
    if query:
        # Highlight the search keyword
        st.subheader("Search Results:")
        result = highlight_keywords(extracted_text, query)
        st.write(result)