rohantheru's picture
Update app.py
152dc71 verified
import streamlit as st
import PyPDF2
from transformers import pipeline
import pytesseract
from PIL import Image, ImageEnhance, ImageFilter
# Load pre-trained model and tokenizercheckpoint ="facebook/bart-large-cnn"
checkpoint ="facebook/bart-large-cnn"
model = pipeline('summarization', model=checkpoint)
# Streamlit UI
st.title("Text Summarizer using LLM")
# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
pdf_reader = PyPDF2.PdfReader(pdf_file)
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text = page.extract_text()
return text
def summarize_text(text):
summary = model(text, min_length=256, max_length=512, do_sample=True)[0]['summary_text']
return summary
# Function to perform OCR on uploaded image
def perform_ocr(image):
# Enhance image
# Perform OCR
text = pytesseract.image_to_string(image, lang='eng', config='--psm 3')
return text
def answering(tex):
question_answerer = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')
question = st.text_input("Enter your question:")
if st.button("Answer Question"):
# Generate the answer
result = question_answerer(question=question,context=tex)
# Display the answer
st.subheader("Answer:")
st.write(result["answer"])
# Radio button for selecting input format
input_format = st.selectbox("Select input format:", ('Text', 'PDF', 'Image'))
# PDF input box for the document to be summarized
if input_format == 'Text':
uploaded_file = st.file_uploader("Upload a text document (.txt)", type="txt")
if uploaded_file is not None:
text = uploaded_file.read().decode("utf-8")
st.subheader("Original Text:")
st.write(text)
if st.button("Summarize"):
# Generate the summary
summary = summarize_text(text)
# Display the summary
st.subheader("Summary:")
st.write(summary)
answering(text)
elif input_format == 'PDF':
uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")
if uploaded_file is not None:
text = extract_text_from_pdf(uploaded_file)
st.subheader("Original Text:")
st.write(text)
if st.button("Summarize"):
# Generate the summary
summary = summarize_text(text)
# Display the summary
st.subheader("Summary:")
st.write(summary)
answering(text)
elif input_format == 'Image':
uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
if uploaded_image is not None:
image = Image.open(uploaded_image)
text = perform_ocr(image)
st.subheader("Extracted Text from Image:")
st.write(text)
if st.button("Summarize"):
# Generate the summary
summary = summarize_text(text)
# Display the summary
st.subheader("Summary:")
st.write(summary)
answering(text)