Spaces:

peter2000
/

policy_test

Runtime error

App Files Files Community

policy_test / app.py

peter2000

Update app.py

de5cd4d over 2 years ago

raw

history blame

2.09 kB

	import streamlit as st
	st.set_page_config(f'SDSN x GIZ Policy Tracing', layout="wide")

	import seaborn as sns
	import pdfplumber
	from pandas import DataFrame
	from keybert import KeyBERT
	import matplotlib.pyplot as plt
	import numpy as np
	import streamlit as st



	##@st.cache(allow_output_mutation=True)
	def load_model():
	return KeyBERT()

	def read_(file):
	if file is not None:
	text = []
	with pdfplumber.open(file) as pdf:
	for page in pdf.pages:
	text.append(page.extract_text())
	text_str = ' '.join([page for page in text])



	st.sidebar.image(
	"https://github.com/gizdatalab/policy_tracing/blob/main/img/sdsn.png?raw=true",
	use_column_width=True
	)

	st.sidebar.container():
	file = st.file_uploader('Upload PDF File', type=['pdf'])
	st.sidebar.title(
	"Options:"
	)

	st.sidebar.markdown(
	"You can freely browse the different chapters - ie example prompts from different people - and see the results."
	)

	selected_date = st.sidebar.selectbox(
	"Please select the chapter you want to read:",
	['c1','c2']
	)

	with st.container():
	st.markdown("<h1 style='text-align: center; color: black;'> SDSN X GIZ - Policy Action Tracking</h1>", unsafe_allow_html=True)
	st.write(' ')
	st.write(' ')

	with st.expander("ℹ️ - About this app", expanded=True):

	st.write(
	"""
	The Policy Action Tracker app is an easy-to-use interface built with Streamlit for analyzing policy documents - developed by GIZ Data and the Sustainable Development Solution Network.

	It uses a minimal keyword extraction technique that leverages multiple NLP embeddings and relies on [Transformers] (https://huggingface.co/transformers/) 🤗 to create keywords/keyphrases that are most similar to a document.
	"""
	)

	st.markdown("")
	st.markdown("")
	st.markdown("## 📌 Step One: Upload document ")


	with st.container():

	file = st.file_uploader('Upload PDF File', type=['pdf'])
	text_str = read_(file)
	st.write('Number of pages:',len(pdf.pages))