File size: 3,599 Bytes
b2b8748
5ab1485
61945ee
5ab1485
61945ee
 
2863900
61945ee
 
b2b8748
 
61945ee
 
 
b2b8748
 
 
61945ee
2863900
61945ee
 
 
 
 
b2b8748
61945ee
b2b8748
428feb9
b05824b
 
 
b2b8748
61945ee
 
758f813
61945ee
758f813
61945ee
 
b05824b
61945ee
 
 
 
 
b2b8748
 
61945ee
 
b2b8748
 
61945ee
 
b2b8748
61945ee
b2b8748
61945ee
 
b2b8748
61945ee
b2b8748
 
61945ee
b2b8748
 
 
758f813
37ff84b
b2b8748
9433d83
61945ee
b2b8748
 
61945ee
 
405bb93
 
 
 
 
 
5ab1485
61945ee
5ab1485
09a49b4
 
 
 
405bb93
 
 
 
d049464
09a49b4
 
 
 
 
d049464
09a49b4
 
d049464
 
b2b8748
61945ee
5ab1485
5d843b7
61945ee
 
5ab1485
6480c28
5ab1485
 
fee9d85
 
af03842
405bb93
 
6480c28
 
5ab1485
6480c28
 
 
 
 
 
 
 
 
 
 
6c822fc
 
6480c28
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import streamlit as st 
import pdfkit

from PyPDF2 import PdfReader
from transformers import pipeline

@st.cache_resource
summarizer = pipeline(task="summarization")

# Basic text summary
st.set_page_config(
    page_title='Text Summarizer'
)

st.title('Text Summarization')

# Text summary function

@st.cache_resource
def summarize_text(text):
    summary = summarizer(text)
    summary = summary[0]['summary_text']
    return summary

input = st.text_area('Enter long text')

if st.button('Summarize text'):
    with st.spinner('Summarizing'):
         output = summarize_text(input)
         st.success('Summary complete ')
        
    st.markdown(f'''
            <div style="background-color: black; color: white; font-weight: bold; padding: 1rem; border-radius: 10px;">
            <h4>Results</h4>
                <div>
                    {output}
                </div>
            </div>
                ''', unsafe_allow_html=True)
    
    
    
#####

# PDF summary section

st.subheader('PDF summary')

try:
    # Upload file
    uploaded_pdf = st.file_uploader('Choose a pdf file', type=['pdf'])

    if uploaded_pdf is not None:
        st.success('Succesfully uploaded')
        
    # Extract PDF content    
    def extract_text(pdf_file):
        pdf_content = PdfReader(pdf_file)
        pages =pdf_content.pages
        # page_count = len(pages)
        
        page_text_stack = []

        for page in pages:
            page_text = page.extract_text()
            page_text_stack.append(page_text)

        return page_text_stack
    


except: # Handle blank file error
    st.error('Please select a valid file')


def check_page_count(pdf):
    pdf_content = PdfReader(pdf)
    pages =pdf_content.pages
    page_count = len(pages)
    
    return page_count
    

# Processs to trigger summary
if st.button('Summarize pdf content'):
    with st.spinner('Extracting text from PDF...'):
        pdf_input = extract_text(uploaded_pdf)
        st.success('Text extracted')

    num_of_pages = check_page_count(uploaded_pdf)
    st.success(f'NUmber of pages is {num_of_pages}.')
    
    with st.spinner('Summarizing extracted text...'):
        pdf_output = []
    
        for stack in pdf_input:
            summarize_text(stack)
            pdf_output.append(stack)
           
        pdf_summary = '\n\n'.join(pdf_output)
        st.success('Summary complete')

        
    st.markdown(f'''
            <div style="background-color: black; color: white; font-weight: bold; padding: 1rem; border-radius: 10px;">
             <h4>Summary </h4>
                <p>{pdf_summary}</p>
            </div>
                ''', unsafe_allow_html=True)
    
    st.success('PDF page summarized :)', icon="✅")
    
    
    # if st.button('Generate pdf download link'):
    #     download_button = st.download_button(label='Download summary PDF', data=pdf_summary, file_name='summary.pdf', mime='application/pdf')   
        

  

st.write('')
st.write('')


st.markdown("<hr style='border: 1px dashed #ddd; margin: 2rem;'>", unsafe_allow_html=True) #Horizontal line

st.markdown("""
    <div style="text-align: center; padding: 1rem;">
        Project by <a href="https://github.com/ChibuzoKelechi" target="_blank" style="color: white; font-weight: bold; text-decoration: none;">
         kelechi_tensor</a>
    </div>
    
    <div style="text-align: center; padding: 1rem;">
        Resources <a href="https://huggingface.co" target="_blank" style="color: white; font-weight: bold; text-decoration: none;">
         Hugging face</a>
    </div>
""",
unsafe_allow_html=True)