File size: 3,723 Bytes
eee7134
eab471f
f24279f
eab471f
5b4a98a
3f54553
a26f453
 
77a6d9d
 
 
 
 
 
a26f453
5b4a98a
501e1bb
a26f453
3f54553
27a4df7
77a6d9d
27a4df7
7f93a13
5b4a98a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b0709f
 
 
 
57455f3
 
 
 
 
 
 
 
 
 
 
 
89edccb
3b0709f
 
 
57455f3
 
 
 
 
3b0709f
89edccb
 
5b4a98a
89edccb
 
5b4a98a
89edccb
 
5b4a98a
89edccb
5b4a98a
89edccb
 
 
 
 
 
 
 
 
 
 
5b4a98a
89edccb
 
5b4a98a
89edccb
 
5b4a98a
3f54553
89edccb
 
80a32ce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import streamlit as st
from setfit import SetFitModel
from file_processing import get_paragraphs

####################################### Dashboard ######################################################

# App 
st.title("Identify references to vulnerable groups.")

st.write("""Vulnerable groups encompass various communities and individuals who are disproportionately affected by the impacts of climate change
due to their socioeconomic status, geographical location, or inherent characteristics. By incorporating the needs and perspectives of these groups 
into national climate policies, governments can ensure equitable outcomes, promote social justice, and strive to build resilience within the most marginalized populations, 
fostering a more sustainable and inclusive society as we navigate the challenges posed by climate change.This app allows you to identify whether a text contains any 
references to vulnerable groups, for example when talking about policy documents.""")

# Document upload
uploaded_file = st.file_uploader("Upload your file here")

# Create text input box
#input_text = st.text_area(label='Please enter your text here', value="This policy has been implemented to support women.")

#st.write('Prediction:', model(input_text))

######################################### Model #########################################################

# Load the model
model = SetFitModel.from_pretrained("leavoigt/vulnerable_groups")

# Define the classes
id2label = {
    0: 'Agricultural communities',
    1: 'Children and Youth',
    2: 'Coastal communities',
    3: 'Drought-prone regions',
    4: 'Economically disadvantaged communities',
    5: 'Elderly population',
    6: 'Ethnic minorities and indigenous people',
    7: 'Informal sector workers',
    8: 'Migrants and Refugees',
    9: 'Other',
    10: 'People with Disabilities',
    11: 'Rural populations',
    12: 'Sexual minorities (LGBTQI+)',
    13: 'Urban populations',
    14: 'Women'}


### Process document to paragraphs 
# Source: https://blog.jcharistech.com/2021/01/21/how-to-save-uploaded-files-to-directory-in-streamlit-apps/

# Store uploaded file temporarily in directory to get file path (necessary for processing)
# def save_uploadedfile(upl_file):
#      with open(os.path.join("tempDir",upl_file.name),"wb") as f:
#          f.write(upl_file.getbuffer())
#      return st.success("Saved File:{} to tempDir".format(upl_file.name))

# if uploaded_file is not None: 
#     # Save the file 
#     file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type}
#     save_uploadedfile(uploaded_file)

#     #Get the file path

file = st.file_uploader("File upload", type=['pdf', 'docx', 'txt'])

if uploaded_file is not None: 

    # Retrieve the file name 
    with tempfile.NamedTemporaryFile(mode="wb") as temp:
        bytes_data = files.getvalue()
        temp.write(bytes_data)
        print(temp.name)

#    # Process file
     par_list = get_paragraphs(temp.name)

    ### Make predictions 
    preds = vg_model(par_list)

    # Get label names 
    preds_list = preds.tolist()

    predictions_names=[]

    # loop through each prediction
    for ele in preds_list:
       try:
         index_of_one = ele.index(1)
       except ValueError:
    index_of_one = "NA" 
  if index_of_one != "NA": 
    name  = id2label[index_of_one]
  else: 
    name = "NA"
    predictions_names.append(name)

    # Combine the paragraphs and labels to a dataframe 
    df_predictions = pd.DataFrame({'Paragraph': par_list, 'Prediction': predictions_names})

    # Drop all "Other" and "NA" predictions
    filtered_df = df[df['Prediction'].isin(['Other', 'NA'])]


    #####################################
    st.write(df)