File size: 1,943 Bytes
4baa579
a653348
d239c1e
 
 
4baa579
1fb96d0
d239c1e
4baa579
d239c1e
 
1fb96d0
a653348
 
 
 
 
 
 
d239c1e
 
 
 
 
1fb96d0
 
d239c1e
 
a653348
d239c1e
 
 
 
 
 
 
2135ce2
d239c1e
a653348
 
d239c1e
a653348
445e71a
1fb96d0
 
d239c1e
a653348
 
1fb96d0
a653348
 
 
 
39111d4
a653348
 
d239c1e
39111d4
d239c1e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import streamlit as st
import fitz  # PyMuPDF
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import pipeline

# Model and tokenizer loading
checkpoint = "./model/LaMini-Flan-T5-248M"
tokenizer = T5Tokenizer.from_pretrained(checkpoint)
base_model = T5ForConditionalGeneration.from_pretrained(checkpoint)

# LLM pipeline
def llm_pipeline(pdf_contents, max_length=500, min_length=50):
    # Extract text from the PDF contents
    pdf_document = fitz.open(stream=pdf_contents, filetype="pdf")
    pdf_text = ""
    for page_num in range(pdf_document.page_count):
        page = pdf_document.load_page(page_num)
        pdf_text += page.get_text()

    # Use the pipeline to generate the summary
    pipe_sum = pipeline(
        'summarization',
        model=base_model,
        tokenizer=tokenizer,
        max_length=max_length,
        min_length=min_length
    )

    result = pipe_sum(pdf_text)
    summary = result[0]['summary_text']
    return summary

# Streamlit code
st.set_page_config(layout="wide")

def main():
    st.title("Document Summarization using a Language Model")

    # Button to upload a PDF file
    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

    if uploaded_file is not None:
        max_length = st.slider("Maximum Summary Length", min_value=50, max_value=1000, step=20, value=500)
        min_length = st.slider("Minimum Summary Length", min_value=10, max_value=500, step=10, value=50)

        if st.button("Summarize"):
            # Check if the uploaded file is a PDF
            if uploaded_file.type == "application/pdf":
                summary = llm_pipeline(uploaded_file.read(), max_length, min_length)

                # Display the summary
                st.info("Summarization Complete")
                st.success(summary)

            else:
                st.error("Please upload a valid PDF file.")


if __name__ == "__main__":
    main()