Spaces:

NBayer
/

Streamlit_app_paper

Runtime error

File size: 3,853 Bytes

import streamlit as st
from streamlit.components.v1 import html
import os
import PyPDF2
import requests
from transformers import pipeline

def get_pdf_text(pdf_path):
    # creating a pdf file object
    pdfFileObj = open(pdf_path, 'rb')
        
    # creating a pdf reader object
    pdf_reader = PyPDF2.PdfReader(pdfFileObj)

    # extract text
    total_text_list = []

    for i in range(len(pdf_reader.pages)):
        page_text = pdf_reader.pages[i].extract_text()
        total_text_list.append(page_text)

    pdf_text = " ".join(total_text_list)
    pdfFileObj.close()

    return pdf_text

sum_model = pipeline("text2text-generation", model="yasminesarraj/flan-t5-small-samsum")

headers = {"Authorization": st.secrets["HF_AUTH"]}

def create_tags(payload):
    API_URL_TAGS = "https://api-inference.huggingface.co/models/fabiochiu/t5-base-tag-generation"
    
    response = requests.post(API_URL_TAGS, headers=headers, json=payload)
    return response.json()

def summarize_text(payload):
    API_URL = "https://api-inference.huggingface.co/models/yasminesarraj/flan-t5-small-samsum"
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()


# Start of the app code

tab_your_paper, tab_general_topics = st.tabs(["Summarize your paper(s)", "Research topics"])

with tab_your_paper:
    html("", height=10)

    st.markdown("""
### Simply upload one or multiple PDFs and we summarize the content for you!
    """)

    pdf_files = st.file_uploader("Upload your paper as a pdf", type=[".pdf"], accept_multiple_files=True, help="You can summarize one or also multiple papers at once. The file format needs to be a pdf.")
    if pdf_files:
        recently_added = []
        for pdf in pdf_files:
            # Saving the files
            pdf_data = pdf.getvalue()
            pdf_path = os.path.join(pdf.name)
            with open(pdf_path, "wb") as f:
                f.write(pdf_data)
                recently_added.append(pdf_path)

        pdfs_content_list = []
        for recent_pdf in recently_added:
            # Reading the pdf files
            pdf_content = get_pdf_text(recent_pdf)
            print("**", pdf_content)
            pdfs_content_list.append(pdf_content)

            # Delete the files
            os.remove(recent_pdf)

        all_text_together = " ".join(pdfs_content_list)

        try:
            tags = create_tags({
                "inputs": all_text_together,
            })[0]["generated_text"]
            tags_available = True
        except:
            tags_available = False

        try:
            summary = summarize_text({
                "inputs": all_text_together
            })[0]["summary_text"]
            sum_available = True
        except:
            sum_available = False
        
        col1, col2 = st.columns(2)
        if sum_available == True:
            with col1:
                if len(recently_added) > 1:
                    st.markdown("#### Summary of your paper(s):")
                else:
                    st.markdown("#### Summary of your paper:")
                st.write(summary)
        else:
            with col1:
                st.write(sum_model(all_text_together))

        if tags_available == True:
            with col2:
                if len(recently_added) > 1:
                    st.markdown("#### Identified topics of your paper(s):")
                else:
                    st.markdown("#### Identified topics of your paper:")
                st.write(tags)

        with st.expander("See your total text"):
            st.write(all_text_together)


with tab_general_topics:
    html("", height=10)

    st.header("See the status of a research topic through a summary of the most cited papers")

    st.selectbox("Select a research topic", ["Artificial Intelligence", "Sustainability", "Cooking"])