Spaces:
Runtime error
Runtime error
File size: 3,853 Bytes
c42ad4e 0d406fd 4f91ef7 5c37675 c42ad4e 4f91ef7 c141399 0d406fd 486c2a5 0d406fd 274b11e c42ad4e cfb8ed0 c42ad4e 0d406fd c42ad4e 0d406fd 1e17bf9 0d406fd 1e17bf9 4f91ef7 0d406fd e8ec388 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import streamlit as st
from streamlit.components.v1 import html
import os
import PyPDF2
import requests
from transformers import pipeline
def get_pdf_text(pdf_path):
# creating a pdf file object
pdfFileObj = open(pdf_path, 'rb')
# creating a pdf reader object
pdf_reader = PyPDF2.PdfReader(pdfFileObj)
# extract text
total_text_list = []
for i in range(len(pdf_reader.pages)):
page_text = pdf_reader.pages[i].extract_text()
total_text_list.append(page_text)
pdf_text = " ".join(total_text_list)
pdfFileObj.close()
return pdf_text
sum_model = pipeline("text2text-generation", model="yasminesarraj/flan-t5-small-samsum")
headers = {"Authorization": st.secrets["HF_AUTH"]}
def create_tags(payload):
API_URL_TAGS = "https://api-inference.huggingface.co/models/fabiochiu/t5-base-tag-generation"
response = requests.post(API_URL_TAGS, headers=headers, json=payload)
return response.json()
def summarize_text(payload):
API_URL = "https://api-inference.huggingface.co/models/yasminesarraj/flan-t5-small-samsum"
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
# Start of the app code
tab_your_paper, tab_general_topics = st.tabs(["Summarize your paper(s)", "Research topics"])
with tab_your_paper:
html("", height=10)
st.markdown("""
### Simply upload one or multiple PDFs and we summarize the content for you!
""")
pdf_files = st.file_uploader("Upload your paper as a pdf", type=[".pdf"], accept_multiple_files=True, help="You can summarize one or also multiple papers at once. The file format needs to be a pdf.")
if pdf_files:
recently_added = []
for pdf in pdf_files:
# Saving the files
pdf_data = pdf.getvalue()
pdf_path = os.path.join(pdf.name)
with open(pdf_path, "wb") as f:
f.write(pdf_data)
recently_added.append(pdf_path)
pdfs_content_list = []
for recent_pdf in recently_added:
# Reading the pdf files
pdf_content = get_pdf_text(recent_pdf)
print("**", pdf_content)
pdfs_content_list.append(pdf_content)
# Delete the files
os.remove(recent_pdf)
all_text_together = " ".join(pdfs_content_list)
try:
tags = create_tags({
"inputs": all_text_together,
})[0]["generated_text"]
tags_available = True
except:
tags_available = False
try:
summary = summarize_text({
"inputs": all_text_together
})[0]["summary_text"]
sum_available = True
except:
sum_available = False
col1, col2 = st.columns(2)
if sum_available == True:
with col1:
if len(recently_added) > 1:
st.markdown("#### Summary of your paper(s):")
else:
st.markdown("#### Summary of your paper:")
st.write(summary)
else:
with col1:
st.write(sum_model(all_text_together))
if tags_available == True:
with col2:
if len(recently_added) > 1:
st.markdown("#### Identified topics of your paper(s):")
else:
st.markdown("#### Identified topics of your paper:")
st.write(tags)
with st.expander("See your total text"):
st.write(all_text_together)
with tab_general_topics:
html("", height=10)
st.header("See the status of a research topic through a summary of the most cited papers")
st.selectbox("Select a research topic", ["Artificial Intelligence", "Sustainability", "Cooking"]) |