Spaces:

rohantheru
/

Bart_Text_Summarizer

Sleeping

App Files Files Community

Bart_Text_Summarizer / app.py

rohantheru

Update app.py

152dc71 verified 8 months ago

raw

history blame contribute delete

3.08 kB

	import streamlit as st
	import PyPDF2
	from transformers import pipeline
	import pytesseract
	from PIL import Image, ImageEnhance, ImageFilter

	# Load pre-trained model and tokenizercheckpoint ="facebook/bart-large-cnn"
	checkpoint ="facebook/bart-large-cnn"
	model = pipeline('summarization', model=checkpoint)

	# Streamlit UI
	st.title("Text Summarizer using LLM")

	# Function to extract text from PDF
	def extract_text_from_pdf(pdf_file):
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	for page_num in range(len(pdf_reader.pages)):
	page = pdf_reader.pages[page_num]
	text = page.extract_text()
	return text

	def summarize_text(text):
	summary = model(text, min_length=256, max_length=512, do_sample=True)[0]['summary_text']
	return summary



	# Function to perform OCR on uploaded image
	def perform_ocr(image):
	# Enhance image

	# Perform OCR
	text = pytesseract.image_to_string(image, lang='eng', config='--psm 3')
	return text


	def answering(tex):
	question_answerer = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')
	question = st.text_input("Enter your question:")
	if st.button("Answer Question"):
	# Generate the answer
	result = question_answerer(question=question,context=tex)

	# Display the answer
	st.subheader("Answer:")
	st.write(result["answer"])

	# Radio button for selecting input format
	input_format = st.selectbox("Select input format:", ('Text', 'PDF', 'Image'))

	# PDF input box for the document to be summarized
	if input_format == 'Text':
	uploaded_file = st.file_uploader("Upload a text document (.txt)", type="txt")
	if uploaded_file is not None:
	text = uploaded_file.read().decode("utf-8")
	st.subheader("Original Text:")
	st.write(text)

	if st.button("Summarize"):
	# Generate the summary
	summary = summarize_text(text)

	# Display the summary
	st.subheader("Summary:")
	st.write(summary)
	answering(text)

	elif input_format == 'PDF':
	uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")
	if uploaded_file is not None:
	text = extract_text_from_pdf(uploaded_file)
	st.subheader("Original Text:")
	st.write(text)

	if st.button("Summarize"):
	# Generate the summary
	summary = summarize_text(text)

	# Display the summary
	st.subheader("Summary:")
	st.write(summary)
	answering(text)

	elif input_format == 'Image':
	uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
	if uploaded_image is not None:
	image = Image.open(uploaded_image)
	text = perform_ocr(image)
	st.subheader("Extracted Text from Image:")
	st.write(text)

	if st.button("Summarize"):
	# Generate the summary
	summary = summarize_text(text)

	# Display the summary
	st.subheader("Summary:")
	st.write(summary)
	answering(text)