import streamlit as st import PyPDF2 from transformers import pipeline import pytesseract from PIL import Image, ImageEnhance, ImageFilter # Load pre-trained model and tokenizercheckpoint ="facebook/bart-large-cnn" checkpoint ="facebook/bart-large-cnn" model = pipeline('summarization', model=checkpoint) # Streamlit UI st.title("Text Summarizer using LLM") # Function to extract text from PDF def extract_text_from_pdf(pdf_file): pdf_reader = PyPDF2.PdfReader(pdf_file) for page_num in range(len(pdf_reader.pages)): page = pdf_reader.pages[page_num] text = page.extract_text() return text def summarize_text(text): summary = model(text, min_length=256, max_length=512, do_sample=True)[0]['summary_text'] return summary # Function to perform OCR on uploaded image def perform_ocr(image): # Enhance image # Perform OCR text = pytesseract.image_to_string(image, lang='eng', config='--psm 3') return text def answering(tex): question_answerer = pipeline("question-answering", model='distilbert-base-cased-distilled-squad') question = st.text_input("Enter your question:") if st.button("Answer Question"): # Generate the answer result = question_answerer(question=question,context=tex) # Display the answer st.subheader("Answer:") st.write(result["answer"]) # Radio button for selecting input format input_format = st.selectbox("Select input format:", ('Text', 'PDF', 'Image')) # PDF input box for the document to be summarized if input_format == 'Text': uploaded_file = st.file_uploader("Upload a text document (.txt)", type="txt") if uploaded_file is not None: text = uploaded_file.read().decode("utf-8") st.subheader("Original Text:") st.write(text) if st.button("Summarize"): # Generate the summary summary = summarize_text(text) # Display the summary st.subheader("Summary:") st.write(summary) answering(text) elif input_format == 'PDF': uploaded_file = st.file_uploader("Upload a PDF document", type="pdf") if uploaded_file is not None: text = extract_text_from_pdf(uploaded_file) st.subheader("Original Text:") st.write(text) if st.button("Summarize"): # Generate the summary summary = summarize_text(text) # Display the summary st.subheader("Summary:") st.write(summary) answering(text) elif input_format == 'Image': uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) if uploaded_image is not None: image = Image.open(uploaded_image) text = perform_ocr(image) st.subheader("Extracted Text from Image:") st.write(text) if st.button("Summarize"): # Generate the summary summary = summarize_text(text) # Display the summary st.subheader("Summary:") st.write(summary) answering(text)