Spaces:

mdasad3617
/

lab-report-analyzer

Sleeping

App Files Files Community

lab-report-analyzer / app.py

mdasad3617

Update app.py

8b18b7b verified 5 months ago

raw

history blame

4.17 kB

	import streamlit as st
	from transformers import pipeline, VisionEncoderDecoderModel, ViTImageProcessor
	from PIL import Image
	import fitz
	import logging
	from concurrent.futures import ThreadPoolExecutor
	import torch

	# Setup logging
	def setup_logging():
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(levelname)s - %(message)s",
	)

	# Load models globally for faster performance
	@st.cache_resource
	def load_models():
	logging.info("Loading Hugging Face models...")
	# Load image-to-text model from Hugging Face
	processor = ViTImageProcessor.from_pretrained("microsoft/vision-transformation-transformer")
	model = VisionEncoderDecoderModel.from_pretrained("microsoft/vision-transformation-transformer")

	# Load translation models
	translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
	translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")

	# Summarization model
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

	return processor, model, translator_hi, translator_ur, summarizer

	# Function to extract text from images using Hugging Face model
	def extract_text_from_image(image, processor, model):
	logging.info("Extracting text from image...")
	# Preprocess image
	inputs = processor(images=image, return_tensors="pt")
	# Use the model to generate captions
	out = model.generate(**inputs)
	return processor.decode(out[0], skip_special_tokens=True)

	# Function to extract text from PDFs
	def extract_text_from_pdf(pdf_file):
	logging.info("Extracting text from PDF...")
	doc = fitz.open(pdf_file)
	text = ""
	for page in doc:
	text += page.get_text()
	return text

	# Function to process text in chunks for better performance
	def process_chunks(text, model, chunk_size=500):
	chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
	results = []
	with ThreadPoolExecutor() as executor:
	results = list(executor.map(lambda chunk: model(chunk, max_length=200), chunks))
	return " ".join([result[0]["translation_text"] for result in results])

	# Main app logic
	def main():
	setup_logging()
	st.title("Lab Report Analyzer")
	st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")

	# Load models
	processor, model, translator_hi, translator_ur, summarizer = load_models()

	file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
	if file:
	text = ""
	try:
	if file.type in ["image/jpeg", "image/png", "image/jpg"]:
	image = Image.open(file)
	text = extract_text_from_image(image, processor, model)
	elif file.type == "application/pdf":
	text = extract_text_from_pdf(file)
	elif file.type == "text/plain":
	text = file.read().decode("utf-8")

	if text:
	with st.spinner("Analyzing the report..."):
	# Generate summary
	summary = summarizer(text, max_length=130, min_length=30)[0]["summary_text"]

	# Generate translations
	hindi_translation = process_chunks(text, translator_hi)
	urdu_translation = process_chunks(text, translator_ur)

	# Display results
	st.subheader("Analysis Summary (English):")
	st.write(summary)

	st.subheader("Hindi Translation:")
	st.write(hindi_translation)

	st.subheader("Urdu Translation:")
	st.write(urdu_translation)
	else:
	st.warning("No text could be extracted. Please check the file and try again.")
	except Exception as e:
	logging.error(f"Error processing the file: {e}")
	st.error("An error occurred while processing the file. Please try again.")
	else:
	st.info("Please upload a file to begin.")

	if __name__ == "__main__":
	main()