Spaces:
Running
Running
File size: 4,577 Bytes
7f334ec 8cf2395 7f334ec 4d72778 cc10da2 7f334ec 8cf2395 b8905fc 8cf2395 4d72778 8cf2395 4d72778 8cf2395 4d72778 8cf2395 b8905fc 8cf2395 7f334ec 8cf2395 b8905fc 8cf2395 b8905fc 8cf2395 b8905fc 8cf2395 b8905fc 8cf2395 b8905fc 8cf2395 b8905fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import streamlit as st
from transformers import pipeline
import pdfplumber
from PIL import Image
import easyocr
# Initialize Models
@st.cache_resource
def initialize_models():
return {
"report_check_model": pipeline("zero-shot-classification", model="facebook/bart-large-mnli"),
"sentiment_model": pipeline("sentiment-analysis"),
"summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"),
"translation_model": {
"en": pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en"),
"hi": pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi"),
"ur": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
}
}
# Extract text from PDF
def extract_text_from_pdf(pdf_file):
text = ""
with pdfplumber.open(pdf_file) as pdf:
for page in pdf.pages:
text += page.extract_text()
return text.strip()
# Extract text from Image using EasyOCR
def extract_text_from_image(image_file):
reader = easyocr.Reader(['en']) # Add more languages if needed
image = Image.open(image_file)
result = reader.readtext(image, detail=0) # `detail=0` returns only the text
return " ".join(result).strip()
# Preprocess text for model input
def preprocess_text(text, max_length=1024):
return text[:max_length] if len(text) > max_length else text
# Check if content is a lab report
def is_lab_report(text, model):
result = model(text, candidate_labels=["lab report", "not lab report"])
return result["labels"][0] == "lab report"
# Analyze sentiment
def analyze_sentiment(text, sentiment_model):
result = sentiment_model(text)[0]
sentiment = "Positive" if result["label"] == "POSITIVE" else "Negative"
return sentiment, result["score"]
# Summarize content
def summarize_content(text, summarize_model):
summary = summarize_model(text, max_length=130, min_length=30, do_sample=False)
return summary[0]['summary_text']
# Translate content
def translate_content(text, translation_models):
return {
"English": text,
"Hindi": translation_models["hi"](text)[0]["translation_text"],
"Urdu": translation_models["ur"](text)[0]["translation_text"]
}
# Streamlit App
def main():
st.title("Lab Test Analyzer")
models = initialize_models()
uploaded_file = st.file_uploader("Upload a Lab Report (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"])
if uploaded_file:
file_type = uploaded_file.name.split(".")[-1].lower()
extracted_text = ""
if file_type == "pdf":
st.write("Processing PDF file...")
extracted_text = extract_text_from_pdf(uploaded_file)
elif file_type in ["png", "jpg", "jpeg"]:
st.write("Processing Image file...")
extracted_text = extract_text_from_image(uploaded_file)
elif file_type == "txt":
st.write("Processing Text file...")
extracted_text = uploaded_file.read().decode("utf-8")
else:
st.error("Unsupported file type.")
return
if extracted_text:
st.subheader("Extracted Content")
st.text_area("Extracted Text", extracted_text, height=200)
# Preprocess text
preprocessed_text = preprocess_text(extracted_text)
# Check if it's a lab report
if not is_lab_report(preprocessed_text, models["report_check_model"]):
st.error("The uploaded file does not appear to be a lab report.")
return
st.success("The uploaded file is a valid lab report.")
# Sentiment Analysis
sentiment, confidence = analyze_sentiment(preprocessed_text, models["sentiment_model"])
st.subheader("Sentiment Analysis")
st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")
# Summarization
summary = summarize_content(preprocessed_text, models["summarize_model"])
st.subheader("Summary")
st.text_area("Summary", summary, height=150)
# Translation
translations = translate_content(summary, models["translation_model"])
st.subheader("Translations")
st.write("**English**: ", translations["English"])
st.write("**Hindi**: ", translations["Hindi"])
st.write("**Urdu**: ", translations["Urdu"])
else:
st.error("Could not extract text from the uploaded file.")
if __name__ == "__main__":
main() |