File size: 4,577 Bytes
7f334ec
8cf2395
 
7f334ec
4d72778
cc10da2
7f334ec
8cf2395
 
 
b8905fc
8cf2395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d72778
8cf2395
4d72778
8cf2395
4d72778
 
8cf2395
b8905fc
 
 
 
8cf2395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f334ec
 
8cf2395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b8905fc
8cf2395
 
 
 
 
b8905fc
 
 
8cf2395
b8905fc
8cf2395
 
 
 
 
 
b8905fc
8cf2395
 
 
 
b8905fc
8cf2395
 
 
 
 
 
 
 
 
 
 
 
 
 
b8905fc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import streamlit as st
from transformers import pipeline
import pdfplumber
from PIL import Image
import easyocr

# Initialize Models
@st.cache_resource
def initialize_models():
    return {
        "report_check_model": pipeline("zero-shot-classification", model="facebook/bart-large-mnli"),
        "sentiment_model": pipeline("sentiment-analysis"),
        "summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"),
        "translation_model": {
            "en": pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en"),
            "hi": pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi"),
            "ur": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
        }
    }

# Extract text from PDF
def extract_text_from_pdf(pdf_file):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text()
    return text.strip()

# Extract text from Image using EasyOCR
def extract_text_from_image(image_file):
    reader = easyocr.Reader(['en'])  # Add more languages if needed
    image = Image.open(image_file)
    result = reader.readtext(image, detail=0)  # `detail=0` returns only the text
    return " ".join(result).strip()

# Preprocess text for model input
def preprocess_text(text, max_length=1024):
    return text[:max_length] if len(text) > max_length else text

# Check if content is a lab report
def is_lab_report(text, model):
    result = model(text, candidate_labels=["lab report", "not lab report"])
    return result["labels"][0] == "lab report"

# Analyze sentiment
def analyze_sentiment(text, sentiment_model):
    result = sentiment_model(text)[0]
    sentiment = "Positive" if result["label"] == "POSITIVE" else "Negative"
    return sentiment, result["score"]

# Summarize content
def summarize_content(text, summarize_model):
    summary = summarize_model(text, max_length=130, min_length=30, do_sample=False)
    return summary[0]['summary_text']

# Translate content
def translate_content(text, translation_models):
    return {
        "English": text,
        "Hindi": translation_models["hi"](text)[0]["translation_text"],
        "Urdu": translation_models["ur"](text)[0]["translation_text"]
    }

# Streamlit App
def main():
    st.title("Lab Test Analyzer")

    models = initialize_models()

    uploaded_file = st.file_uploader("Upload a Lab Report (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"])

    if uploaded_file:
        file_type = uploaded_file.name.split(".")[-1].lower()
        extracted_text = ""

        if file_type == "pdf":
            st.write("Processing PDF file...")
            extracted_text = extract_text_from_pdf(uploaded_file)
        elif file_type in ["png", "jpg", "jpeg"]:
            st.write("Processing Image file...")
            extracted_text = extract_text_from_image(uploaded_file)
        elif file_type == "txt":
            st.write("Processing Text file...")
            extracted_text = uploaded_file.read().decode("utf-8")
        else:
            st.error("Unsupported file type.")
            return

        if extracted_text:
            st.subheader("Extracted Content")
            st.text_area("Extracted Text", extracted_text, height=200)

            # Preprocess text
            preprocessed_text = preprocess_text(extracted_text)

            # Check if it's a lab report
            if not is_lab_report(preprocessed_text, models["report_check_model"]):
                st.error("The uploaded file does not appear to be a lab report.")
                return

            st.success("The uploaded file is a valid lab report.")

            # Sentiment Analysis
            sentiment, confidence = analyze_sentiment(preprocessed_text, models["sentiment_model"])
            st.subheader("Sentiment Analysis")
            st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")

            # Summarization
            summary = summarize_content(preprocessed_text, models["summarize_model"])
            st.subheader("Summary")
            st.text_area("Summary", summary, height=150)

            # Translation
            translations = translate_content(summary, models["translation_model"])
            st.subheader("Translations")
            st.write("**English**: ", translations["English"])
            st.write("**Hindi**: ", translations["Hindi"])
            st.write("**Urdu**: ", translations["Urdu"])

        else:
            st.error("Could not extract text from the uploaded file.")

if __name__ == "__main__":
    main()