NBayer's picture
Update app.py
4f91ef7
raw
history blame
3.85 kB
import streamlit as st
from streamlit.components.v1 import html
import os
import PyPDF2
import requests
from transformers import pipeline
def get_pdf_text(pdf_path):
# creating a pdf file object
pdfFileObj = open(pdf_path, 'rb')
# creating a pdf reader object
pdf_reader = PyPDF2.PdfReader(pdfFileObj)
# extract text
total_text_list = []
for i in range(len(pdf_reader.pages)):
page_text = pdf_reader.pages[i].extract_text()
total_text_list.append(page_text)
pdf_text = " ".join(total_text_list)
pdfFileObj.close()
return pdf_text
sum_model = pipeline("text2text-generation", model="yasminesarraj/flan-t5-small-samsum")
headers = {"Authorization": st.secrets["HF_AUTH"]}
def create_tags(payload):
API_URL_TAGS = "https://api-inference.huggingface.co/models/fabiochiu/t5-base-tag-generation"
response = requests.post(API_URL_TAGS, headers=headers, json=payload)
return response.json()
def summarize_text(payload):
API_URL = "https://api-inference.huggingface.co/models/yasminesarraj/flan-t5-small-samsum"
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
# Start of the app code
tab_your_paper, tab_general_topics = st.tabs(["Summarize your paper(s)", "Research topics"])
with tab_your_paper:
html("", height=10)
st.markdown("""
### Simply upload one or multiple PDFs and we summarize the content for you!
""")
pdf_files = st.file_uploader("Upload your paper as a pdf", type=[".pdf"], accept_multiple_files=True, help="You can summarize one or also multiple papers at once. The file format needs to be a pdf.")
if pdf_files:
recently_added = []
for pdf in pdf_files:
# Saving the files
pdf_data = pdf.getvalue()
pdf_path = os.path.join(pdf.name)
with open(pdf_path, "wb") as f:
f.write(pdf_data)
recently_added.append(pdf_path)
pdfs_content_list = []
for recent_pdf in recently_added:
# Reading the pdf files
pdf_content = get_pdf_text(recent_pdf)
print("**", pdf_content)
pdfs_content_list.append(pdf_content)
# Delete the files
os.remove(recent_pdf)
all_text_together = " ".join(pdfs_content_list)
try:
tags = create_tags({
"inputs": all_text_together,
})[0]["generated_text"]
tags_available = True
except:
tags_available = False
try:
summary = summarize_text({
"inputs": all_text_together
})[0]["summary_text"]
sum_available = True
except:
sum_available = False
col1, col2 = st.columns(2)
if sum_available == True:
with col1:
if len(recently_added) > 1:
st.markdown("#### Summary of your paper(s):")
else:
st.markdown("#### Summary of your paper:")
st.write(summary)
else:
with col1:
st.write(sum_model(all_text_together))
if tags_available == True:
with col2:
if len(recently_added) > 1:
st.markdown("#### Identified topics of your paper(s):")
else:
st.markdown("#### Identified topics of your paper:")
st.write(tags)
with st.expander("See your total text"):
st.write(all_text_together)
with tab_general_topics:
html("", height=10)
st.header("See the status of a research topic through a summary of the most cited papers")
st.selectbox("Select a research topic", ["Artificial Intelligence", "Sustainability", "Cooking"])