Spaces:
Runtime error
Runtime error
import streamlit as st | |
from streamlit.components.v1 import html | |
import os | |
import PyPDF2 | |
import requests | |
from transformers import pipeline | |
def get_pdf_text(pdf_path): | |
# creating a pdf file object | |
pdfFileObj = open(pdf_path, 'rb') | |
# creating a pdf reader object | |
pdf_reader = PyPDF2.PdfReader(pdfFileObj) | |
# extract text | |
total_text_list = [] | |
for i in range(len(pdf_reader.pages)): | |
page_text = pdf_reader.pages[i].extract_text() | |
total_text_list.append(page_text) | |
pdf_text = " ".join(total_text_list) | |
pdfFileObj.close() | |
return pdf_text | |
sum_model = pipeline("text2text-generation", model="yasminesarraj/flan-t5-small-samsum") | |
headers = {"Authorization": st.secrets["HF_AUTH"]} | |
def create_tags(payload): | |
API_URL_TAGS = "https://api-inference.huggingface.co/models/fabiochiu/t5-base-tag-generation" | |
response = requests.post(API_URL_TAGS, headers=headers, json=payload) | |
return response.json() | |
def summarize_text(payload): | |
API_URL = "https://api-inference.huggingface.co/models/yasminesarraj/flan-t5-small-samsum" | |
response = requests.post(API_URL, headers=headers, json=payload) | |
return response.json() | |
# Start of the app code | |
tab_your_paper, tab_general_topics = st.tabs(["Summarize your paper(s)", "Research topics"]) | |
with tab_your_paper: | |
html("", height=10) | |
st.markdown(""" | |
### Simply upload one or multiple PDFs and we summarize the content for you! | |
""") | |
pdf_files = st.file_uploader("Upload your paper as a pdf", type=[".pdf"], accept_multiple_files=True, help="You can summarize one or also multiple papers at once. The file format needs to be a pdf.") | |
if pdf_files: | |
recently_added = [] | |
for pdf in pdf_files: | |
# Saving the files | |
pdf_data = pdf.getvalue() | |
pdf_path = os.path.join(pdf.name) | |
with open(pdf_path, "wb") as f: | |
f.write(pdf_data) | |
recently_added.append(pdf_path) | |
pdfs_content_list = [] | |
for recent_pdf in recently_added: | |
# Reading the pdf files | |
pdf_content = get_pdf_text(recent_pdf) | |
print("**", pdf_content) | |
pdfs_content_list.append(pdf_content) | |
# Delete the files | |
os.remove(recent_pdf) | |
all_text_together = " ".join(pdfs_content_list) | |
try: | |
tags = create_tags({ | |
"inputs": all_text_together, | |
})[0]["generated_text"] | |
tags_available = True | |
except: | |
tags_available = False | |
try: | |
summary = summarize_text({ | |
"inputs": all_text_together | |
})[0]["summary_text"] | |
sum_available = True | |
except: | |
sum_available = False | |
col1, col2 = st.columns(2) | |
if sum_available == True: | |
with col1: | |
if len(recently_added) > 1: | |
st.markdown("#### Summary of your paper(s):") | |
else: | |
st.markdown("#### Summary of your paper:") | |
st.write(summary) | |
else: | |
with col1: | |
st.write(sum_model(all_text_together)) | |
if tags_available == True: | |
with col2: | |
if len(recently_added) > 1: | |
st.markdown("#### Identified topics of your paper(s):") | |
else: | |
st.markdown("#### Identified topics of your paper:") | |
st.write(tags) | |
with st.expander("See your total text"): | |
st.write(all_text_together) | |
with tab_general_topics: | |
html("", height=10) | |
st.header("See the status of a research topic through a summary of the most cited papers") | |
st.selectbox("Select a research topic", ["Artificial Intelligence", "Sustainability", "Cooking"]) |