policy_test / app.py
peter2000's picture
Update app.py
de5cd4d
raw
history blame
2.09 kB
import streamlit as st
st.set_page_config(f'SDSN x GIZ Policy Tracing', layout="wide")
import seaborn as sns
import pdfplumber
from pandas import DataFrame
from keybert import KeyBERT
import matplotlib.pyplot as plt
import numpy as np
import streamlit as st
##@st.cache(allow_output_mutation=True)
def load_model():
return KeyBERT()
def read_(file):
if file is not None:
text = []
with pdfplumber.open(file) as pdf:
for page in pdf.pages:
text.append(page.extract_text())
text_str = ' '.join([page for page in text])
st.sidebar.image(
"https://github.com/gizdatalab/policy_tracing/blob/main/img/sdsn.png?raw=true",
use_column_width=True
)
st.sidebar.container():
file = st.file_uploader('Upload PDF File', type=['pdf'])
st.sidebar.title(
"Options:"
)
st.sidebar.markdown(
"You can freely browse the different chapters - ie example prompts from different people - and see the results."
)
selected_date = st.sidebar.selectbox(
"Please select the chapter you want to read:",
['c1','c2']
)
with st.container():
st.markdown("<h1 style='text-align: center; color: black;'> SDSN X GIZ - Policy Action Tracking</h1>", unsafe_allow_html=True)
st.write(' ')
st.write(' ')
with st.expander("โ„น๏ธ - About this app", expanded=True):
st.write(
"""
The *Policy Action Tracker* app is an easy-to-use interface built with Streamlit for analyzing policy documents - developed by GIZ Data and the Sustainable Development Solution Network.
It uses a minimal keyword extraction technique that leverages multiple NLP embeddings and relies on [Transformers] (https://huggingface.co/transformers/) ๐Ÿค— to create keywords/keyphrases that are most similar to a document.
"""
)
st.markdown("")
st.markdown("")
st.markdown("## ๐Ÿ“Œ Step One: Upload document ")
with st.container():
file = st.file_uploader('Upload PDF File', type=['pdf'])
text_str = read_(file)
st.write('Number of pages:',len(pdf.pages))