peter2000 commited on
Commit
3e0a87b
β€’
1 Parent(s): c5118ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -116
app.py CHANGED
@@ -10,6 +10,22 @@ import numpy as np
10
  import streamlit as st
11
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  with st.container():
14
  st.markdown("<h1 style='text-align: center; color: black;'> Policy Action Tracking</h1>", unsafe_allow_html=True)
15
  st.write(' ')
@@ -25,122 +41,7 @@ with st.expander("ℹ️ - About this app", expanded=True):
25
  """
26
  )
27
 
28
- st.markdown("")
29
 
30
  st.markdown("")
31
  st.markdown("## πŸ“Œ Step One: Upload document ")
32
-
33
- with st.container():
34
-
35
- file = st.file_uploader('Upload PDF File', type=['pdf'])
36
-
37
- if file is not None:
38
- text = []
39
- with pdfplumber.open(file) as pdf:
40
- for page in pdf.pages:
41
- text.append(page.extract_text())
42
- text_str = ' '.join([page for page in text])
43
-
44
- st.write('Number of pages:',len(pdf.pages))
45
-
46
- @st.cache(allow_output_mutation=True)
47
- def load_model():
48
- return KeyBERT()
49
-
50
- kw_model = load_model()
51
-
52
- keywords = kw_model.extract_keywords(
53
- text_str,
54
- keyphrase_ngram_range=(1, 2),
55
- use_mmr=True,
56
- stop_words="english",
57
- top_n=15,
58
- diversity=0.7,
59
- )
60
-
61
- st.markdown("## 🎈 What is my document about?")
62
-
63
- df = (
64
- DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
65
- .sort_values(by="Relevancy", ascending=False)
66
- .reset_index(drop=True)
67
- )
68
-
69
- df.index += 1
70
-
71
- # Add styling
72
- cmGreen = sns.light_palette("green", as_cmap=True)
73
- cmRed = sns.light_palette("red", as_cmap=True)
74
- df = df.style.background_gradient(
75
- cmap=cmGreen,
76
- subset=[
77
- "Relevancy",
78
- ],
79
- )
80
- c1, c2, c3 = st.columns([1, 3, 1])
81
-
82
- format_dictionary = {
83
- "Relevancy": "{:.1%}",
84
- }
85
-
86
- df = df.format(format_dictionary)
87
-
88
- with c2:
89
- st.table(df)
90
-
91
- ######## SDG!
92
- from transformers import pipeline
93
-
94
- finetuned_checkpoint = "peter2000/roberta-base-finetuned-osdg"
95
- classifier = pipeline("text-classification", model=finetuned_checkpoint)
96
-
97
- word_list = text_str.split()
98
- len_word_list = len(word_list)
99
- par_list = []
100
- par_len = 130
101
- for i in range(0,len_word_list // par_len):
102
- string_part = ' '.join(word_list[i*par_len:(i+1)*par_len])
103
- par_list.append(string_part)
104
-
105
- labels = classifier(par_list)
106
- labels_= [(l['label'],l['score']) for l in labels]
107
- df = DataFrame(labels_, columns=["SDG", "Relevancy"])
108
- df['text'] = par_list
109
- df = df.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
110
- df.index += 1
111
- #df =df[df['Relevancy']>.95]
112
- x = df['SDG'].value_counts()
113
-
114
- plt.rcParams['font.size'] = 25
115
- colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(x)))
116
- # plot
117
- fig, ax = plt.subplots()
118
- ax.pie(x, colors=colors, radius=2, center=(4, 4),
119
- wedgeprops={"linewidth": 1, "edgecolor": "white"}, frame=False,labels =list(x.index))
120
-
121
- st.markdown("## 🎈 Anything related to SDGs?")
122
-
123
- c4, c5, c6 = st.columns([5, 7, 1])
124
-
125
- # Add styling
126
- cmGreen = sns.light_palette("green", as_cmap=True)
127
- cmRed = sns.light_palette("red", as_cmap=True)
128
- df = df.style.background_gradient(
129
- cmap=cmGreen,
130
- subset=[
131
- "Relevancy",
132
- ],
133
- )
134
-
135
- format_dictionary = {
136
- "Relevancy": "{:.1%}",
137
- }
138
-
139
- df = df.format(format_dictionary)
140
-
141
- with c4:
142
- st.pyplot(fig)
143
- with c5:
144
- st.table(df)
145
-
146
-
 
10
  import streamlit as st
11
 
12
 
13
+
14
+ ##@st.cache(allow_output_mutation=True)
15
+ def load_model():
16
+ return KeyBERT()
17
+
18
+ kw_model = load_model()
19
+
20
+ keywords = kw_model.extract_keywords(
21
+ text_str,
22
+ keyphrase_ngram_range=(1, 2),
23
+ use_mmr=True,
24
+ stop_words="english",
25
+ top_n=15,
26
+ diversity=0.7,
27
+ )
28
+
29
  with st.container():
30
  st.markdown("<h1 style='text-align: center; color: black;'> Policy Action Tracking</h1>", unsafe_allow_html=True)
31
  st.write(' ')
 
41
  """
42
  )
43
 
44
+ st.markdown("")
45
 
46
  st.markdown("")
47
  st.markdown("## πŸ“Œ Step One: Upload document ")