import streamlit as st
import PyPDF2
from transformers import pipeline
import pytesseract
from PIL import Image, ImageEnhance, ImageFilter

# Load pre-trained model and tokenizercheckpoint ="facebook/bart-large-cnn"
checkpoint ="facebook/bart-large-cnn"
model = pipeline('summarization', model=checkpoint)

# Streamlit UI
st.title("Text Summarizer using LLM")

# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
    pdf_reader = PyPDF2.PdfReader(pdf_file)
    for page_num in range(len(pdf_reader.pages)):
        page = pdf_reader.pages[page_num]
        text = page.extract_text()
    return text

def summarize_text(text):
    summary = model(text, min_length=256, max_length=512, do_sample=True)[0]['summary_text']
    return summary


# Function to perform OCR on uploaded image
def perform_ocr(image):
    # Enhance image
    
    # Perform OCR
    text = pytesseract.image_to_string(image, lang='eng', config='--psm 3')
    return text


def answering(tex):
    question_answerer = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')
    question = st.text_input("Enter your question:")
    if st.button("Answer Question"):
        # Generate the answer
        result = question_answerer(question=question,context=tex)

        # Display the answer
        st.subheader("Answer:")
        st.write(result["answer"])

# Radio button for selecting input format
input_format = st.selectbox("Select input format:", ('Text', 'PDF', 'Image'))

# PDF input box for the document to be summarized
if input_format == 'Text':
    uploaded_file = st.file_uploader("Upload a text document (.txt)", type="txt")
    if uploaded_file is not None:
        text = uploaded_file.read().decode("utf-8")
        st.subheader("Original Text:")
        st.write(text)

        if st.button("Summarize"):
            # Generate the summary
            summary = summarize_text(text)

            # Display the summary
            st.subheader("Summary:")
            st.write(summary)
        answering(text)

elif input_format == 'PDF':
    uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")
    if uploaded_file is not None:
        text = extract_text_from_pdf(uploaded_file)
        st.subheader("Original Text:")
        st.write(text)

        if st.button("Summarize"):
            # Generate the summary
            summary = summarize_text(text)

            # Display the summary
            st.subheader("Summary:")
            st.write(summary)
        answering(text)

elif input_format == 'Image':
    uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
    if uploaded_image is not None:
        image = Image.open(uploaded_image)
        text = perform_ocr(image)
        st.subheader("Extracted Text from Image:")
        st.write(text)

        if st.button("Summarize"):
            # Generate the summary
            summary = summarize_text(text)

            # Display the summary
            st.subheader("Summary:")
            st.write(summary)
        answering(text)