|
import streamlit as st |
|
from setfit import SetFitModel |
|
from file_processing import get_paragraphs |
|
|
|
|
|
|
|
|
|
st.title("Identify references to vulnerable groups.") |
|
|
|
st.write("""Vulnerable groups encompass various communities and individuals who are disproportionately affected by the impacts of climate change |
|
due to their socioeconomic status, geographical location, or inherent characteristics. By incorporating the needs and perspectives of these groups |
|
into national climate policies, governments can ensure equitable outcomes, promote social justice, and strive to build resilience within the most marginalized populations, |
|
fostering a more sustainable and inclusive society as we navigate the challenges posed by climate change.This app allows you to identify whether a text contains any |
|
references to vulnerable groups, for example when talking about policy documents.""") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload your file here") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = SetFitModel.from_pretrained("leavoigt/vulnerable_groups") |
|
|
|
|
|
id2label = { |
|
0: 'Agricultural communities', |
|
1: 'Children and Youth', |
|
2: 'Coastal communities', |
|
3: 'Drought-prone regions', |
|
4: 'Economically disadvantaged communities', |
|
5: 'Elderly population', |
|
6: 'Ethnic minorities and indigenous people', |
|
7: 'Informal sector workers', |
|
8: 'Migrants and Refugees', |
|
9: 'Other', |
|
10: 'People with Disabilities', |
|
11: 'Rural populations', |
|
12: 'Sexual minorities (LGBTQI+)', |
|
13: 'Urban populations', |
|
14: 'Women'} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
file = st.file_uploader("File upload", type=['pdf', 'docx', 'txt']) |
|
|
|
if uploaded_file is not None: |
|
|
|
|
|
with tempfile.NamedTemporaryFile(mode="wb") as temp: |
|
bytes_data = files.getvalue() |
|
temp.write(bytes_data) |
|
print(temp.name) |
|
|
|
|
|
par_list = get_paragraphs(temp.name) |
|
|
|
|
|
preds = vg_model(par_list) |
|
|
|
|
|
preds_list = preds.tolist() |
|
|
|
predictions_names=[] |
|
|
|
|
|
for ele in preds_list: |
|
try: |
|
index_of_one = ele.index(1) |
|
except ValueError: |
|
index_of_one = "NA" |
|
if index_of_one != "NA": |
|
name = id2label[index_of_one] |
|
else: |
|
name = "NA" |
|
predictions_names.append(name) |
|
|
|
|
|
df_predictions = pd.DataFrame({'Paragraph': par_list, 'Prediction': predictions_names}) |
|
|
|
|
|
filtered_df = df[df['Prediction'].isin(['Other', 'NA'])] |
|
|
|
|
|
|
|
st.write(df) |
|
|
|
|