Spaces:
Runtime error
Runtime error
File size: 3,599 Bytes
b2b8748 5ab1485 61945ee 5ab1485 61945ee 2863900 61945ee b2b8748 61945ee b2b8748 61945ee 2863900 61945ee b2b8748 61945ee b2b8748 428feb9 b05824b b2b8748 61945ee 758f813 61945ee 758f813 61945ee b05824b 61945ee b2b8748 61945ee b2b8748 61945ee b2b8748 61945ee b2b8748 61945ee b2b8748 61945ee b2b8748 61945ee b2b8748 758f813 37ff84b b2b8748 9433d83 61945ee b2b8748 61945ee 405bb93 5ab1485 61945ee 5ab1485 09a49b4 405bb93 d049464 09a49b4 d049464 09a49b4 d049464 b2b8748 61945ee 5ab1485 5d843b7 61945ee 5ab1485 6480c28 5ab1485 fee9d85 af03842 405bb93 6480c28 5ab1485 6480c28 6c822fc 6480c28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import streamlit as st
import pdfkit
from PyPDF2 import PdfReader
from transformers import pipeline
@st.cache_resource
summarizer = pipeline(task="summarization")
# Basic text summary
st.set_page_config(
page_title='Text Summarizer'
)
st.title('Text Summarization')
# Text summary function
@st.cache_resource
def summarize_text(text):
summary = summarizer(text)
summary = summary[0]['summary_text']
return summary
input = st.text_area('Enter long text')
if st.button('Summarize text'):
with st.spinner('Summarizing'):
output = summarize_text(input)
st.success('Summary complete ')
st.markdown(f'''
<div style="background-color: black; color: white; font-weight: bold; padding: 1rem; border-radius: 10px;">
<h4>Results</h4>
<div>
{output}
</div>
</div>
''', unsafe_allow_html=True)
#####
# PDF summary section
st.subheader('PDF summary')
try:
# Upload file
uploaded_pdf = st.file_uploader('Choose a pdf file', type=['pdf'])
if uploaded_pdf is not None:
st.success('Succesfully uploaded')
# Extract PDF content
def extract_text(pdf_file):
pdf_content = PdfReader(pdf_file)
pages =pdf_content.pages
# page_count = len(pages)
page_text_stack = []
for page in pages:
page_text = page.extract_text()
page_text_stack.append(page_text)
return page_text_stack
except: # Handle blank file error
st.error('Please select a valid file')
def check_page_count(pdf):
pdf_content = PdfReader(pdf)
pages =pdf_content.pages
page_count = len(pages)
return page_count
# Processs to trigger summary
if st.button('Summarize pdf content'):
with st.spinner('Extracting text from PDF...'):
pdf_input = extract_text(uploaded_pdf)
st.success('Text extracted')
num_of_pages = check_page_count(uploaded_pdf)
st.success(f'NUmber of pages is {num_of_pages}.')
with st.spinner('Summarizing extracted text...'):
pdf_output = []
for stack in pdf_input:
summarize_text(stack)
pdf_output.append(stack)
pdf_summary = '\n\n'.join(pdf_output)
st.success('Summary complete')
st.markdown(f'''
<div style="background-color: black; color: white; font-weight: bold; padding: 1rem; border-radius: 10px;">
<h4>Summary </h4>
<p>{pdf_summary}</p>
</div>
''', unsafe_allow_html=True)
st.success('PDF page summarized :)', icon="✅")
# if st.button('Generate pdf download link'):
# download_button = st.download_button(label='Download summary PDF', data=pdf_summary, file_name='summary.pdf', mime='application/pdf')
st.write('')
st.write('')
st.markdown("<hr style='border: 1px dashed #ddd; margin: 2rem;'>", unsafe_allow_html=True) #Horizontal line
st.markdown("""
<div style="text-align: center; padding: 1rem;">
Project by <a href="https://github.com/ChibuzoKelechi" target="_blank" style="color: white; font-weight: bold; text-decoration: none;">
kelechi_tensor</a>
</div>
<div style="text-align: center; padding: 1rem;">
Resources <a href="https://huggingface.co" target="_blank" style="color: white; font-weight: bold; text-decoration: none;">
Hugging face</a>
</div>
""",
unsafe_allow_html=True)
|