anonymousauthors commited on
Commit
0066f01
1 Parent(s): 63e2daf

Update SecretLanguage.py

Browse files
Files changed (1) hide show
  1. SecretLanguage.py +19 -230
SecretLanguage.py CHANGED
@@ -1,245 +1,34 @@
1
  import streamlit as st
2
  import pandas as pd
 
3
  # import gdown
4
  import os
5
- import pickle
6
- from collections import defaultdict, Counter
7
 
8
- from PyDictionary import PyDictionary
9
-
10
- dictionary = PyDictionary()
11
 
12
  st.set_page_config(layout="wide", page_title="ACl23 Secret Language")
 
 
13
 
14
- hide_expander_border = """
15
- <style>
16
- .st-bd {border-style: none;}
17
- </style>
18
- """
19
-
20
- # st.title("ACl23 Secret Language")
21
-
22
- # sidebar
23
- st.sidebar.header("📙 Dictionary")
24
- _data = st.experimental_get_query_params()
25
- default_title = 'Asian'
26
- if _data:
27
- if 'word' in _data.keys():
28
- default_title = _data['word'][0]
29
- if 'click_word' in st.session_state:
30
- default_title = st.session_state.click_word
31
-
32
- title = st.sidebar.text_input(":red[Search secret languages given the following word (case-sensitive)]", default_title)
33
-
34
- if ord(title[0]) in list(range(48, 57)):
35
- file_name = 'num_dict.pkl'
36
- elif ord(title[0]) in list(range(97, 122)) + list(range(65, 90)):
37
- file_name = f'{ord(title[0])}_dict.pkl'
38
- else:
39
- file_name = 'other_dict.pkl'
40
-
41
- datas = pickle.load(open(f'all_secret_langauge_by_fist/{file_name}', 'rb'))
42
- if title in datas:
43
- st.title(title)
44
- # st.markdown(f":red[[]]")
45
- st.markdown(f"## {title}'s meaning in English[¹](#jump)")
46
-
47
- # write the meaning of input word
48
- try:
49
- title_mean = dictionary.meaning(title)
50
- _string = '>'
51
- for key in title_mean:
52
- _string += f':violet[{key}]: {";".join(title_mean[key])}<br>'
53
- st.markdown(_string, unsafe_allow_html=True)
54
- except:
55
- st.error(f'We cannot find the meaning of {title} in English (PyDictionary), which might be due to the bug.',
56
- icon="🚨")
57
-
58
- st.markdown(f"---")
59
- st.markdown(f"## {title}'s secret languages")
60
- data_title = datas[title]
61
- # st.markdown(data_title.keys())
62
- title_secret_languages = list(sorted(list(set(data_title["secret languages"]))))
63
- # dataframe = pd.DataFrame(datas[title])
64
- # st.markdown(f'### We found {len(set(dataframe.loc[:, "secret languages"]))} secret languages of {title}.', unsafe_allow_html=True)
65
- st.markdown(f'Overall, we found :red[{len(title_secret_languages)}] secret languages of :blue[{title}].',
66
- unsafe_allow_html=True)
67
- special = '"'
68
- # _title_secret_languages = [f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")})'
69
- # for i in title_secret_languages]
70
- # st.markdown('>' + ', '.join(_title_secret_languages).replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
71
- # unsafe_allow_html=True)
72
-
73
- secret_language_by_task = {
74
- 'QA': [],
75
- 'NLI': [],
76
- 'paraphrase': [],
77
- }
78
- for i in range(len(data_title['secret languages'])):
79
- secret_language_by_task[data_title['tasks'][i]].append(data_title['secret languages'][i])
80
- for k in secret_language_by_task:
81
- secret_language_by_task[k] = list(set(secret_language_by_task[k]))
82
-
83
-
84
- def present_sl_task(secret_language_by_task, task):
85
- all_sl = sorted(secret_language_by_task[task])
86
- for i in range(len(all_sl)):
87
- all_sl[i] = all_sl[i].replace("\n", "/n")
88
- st.markdown(
89
- f':red[{len(all_sl)}] secret languages of :blue[{title}] on {task.replace("paraphrase", "Paraphrase")}',
90
- unsafe_allow_html=True)
91
- special = '"'
92
- _title_secret_languages = [
93
- # f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task})'
94
- f'<a href="#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task}">{i}</a>'
95
- for i in all_sl]
96
- st.markdown(
97
- '>' + ', '.join(_title_secret_languages).replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
98
- unsafe_allow_html=True)
99
-
100
-
101
- present_sl_task(secret_language_by_task, 'NLI')
102
- present_sl_task(secret_language_by_task, 'QA')
103
- present_sl_task(secret_language_by_task, 'paraphrase')
104
-
105
- st.markdown(f"*Hyperlinks only function when the corresponding tab is open. "
106
- f"For example, the hyperlinks in the paraphrase section will only work when the paraphrase tab is open. However, due to the container property of Huggingface Space, the hyperlinks might be not able to function.*")
107
- st.markdown(f"---")
108
- st.markdown(f"## Examples of replaced sentences")
109
 
110
- # st.text(','.join(title_secret_languages).replace('<s>', '\<s\>'))
111
- # st.dataframe(dataframe)
112
- _num = Counter(data_title['tasks'])
113
- tab1, tab2, tab3 = st.tabs([f'NLI ({_num["NLI"]})', f'QA ({_num["QA"]})', f'Paraphrase ({_num["paraphrase"]})'])
114
 
 
115
 
116
- def present_dataframe(dataframe, key, title):
117
- new_dataframe = dataframe.loc[dataframe['tasks'] == key].reset_index()
118
- new_dataframe['replaced sentences'] = new_dataframe['replaced sentences'].str.replace('<s>', '[POS]')
119
- if len(new_dataframe):
120
- new_dataframe = new_dataframe.drop(columns=['tasks', 'index'])
121
- # st.markdown(new_dataframe.columns)
122
- for i in range(len(new_dataframe)):
123
- _title = f'{i + 1}\. **[{new_dataframe.loc[i, "secret languages"]}]**'
124
- with st.expander(_title):
125
- # _string = f'{i + 1}. :red[{new_dataframe.loc[i, "secret languages"]}]'
126
- _string = 'Original '
127
- if key == 'NLI':
128
- _string += 'hypothesis: :'
129
- elif key == 'QA':
130
- _string += 'question: :'
131
- elif key == 'Paraphrase':
132
- _string += 'sentence 1: :'
133
- _string += f'blue[{new_dataframe.loc[i, "original sentences"]}]'.replace(":", "[colon]")
134
- _string += '<br>Replaced '
135
- if key == 'NLI':
136
- _string += 'hypothesis: :'
137
- elif key == 'QA':
138
- _string += 'question: :'
139
- elif key == 'Paraphrase':
140
- _string += 'sentence 1: :'
141
- _string += f'red[{new_dataframe.loc[i, "replaced sentences"]}]'.replace(":", "[colon]")
142
- if key == 'NLI':
143
- _string += '<br>Premise: :'
144
- elif key == 'QA':
145
- _string += '<br>Text: :'
146
- elif key == 'Paraphrase':
147
- _string += '<br>Sentence 2: :'
148
- _string += f'blue[{new_dataframe.loc[i, "premise / sentence 2 / text"]}]'.replace(":", "[colon]")
149
- st.markdown(_string, unsafe_allow_html=True)
150
- # st.text(f'Examples: :blue[{new_dataframe.loc[i, "replaced sentences".replace(":", "[colon]")]}]')
151
- # st.dataframe(new_dataframe)
152
- st.markdown(hide_expander_border, unsafe_allow_html=True)
153
- else:
154
- st.error(f'We did not find any Secret Language of {title} on {key}.')
155
 
 
 
156
 
157
- def present_dict(_dict, task):
158
- # st.text(set(_dict['tasks']))
159
- _all = defaultdict(int)
160
- for i in range(len(_dict['secret languages'])):
161
- if _dict['tasks'][i] == task:
162
- _sl = _dict['secret languages'][i]
163
- if type(_all[_sl]) == int:
164
- _all[_sl] = {
165
- 'Original hypothesis': [],
166
- 'Replaced hypothesis': [],
167
- 'Premise': [],
168
- 'output': []
169
- }
170
- _all[_sl]['Original hypothesis'].append(_dict['original sentences'][i])
171
- if task == 'QA':
172
- _all[_sl]['Replaced hypothesis'].append(_dict['replaced sentences'][i].replace('<s>', ''))
173
- else:
174
- _all[_sl]['Replaced hypothesis'].append(_dict['replaced sentences'][i])
175
- _all[_sl]['Premise'].append(_dict['premise / sentence 2 / text'][i])
176
- _all[_sl]['output'].append(_dict['output'][i])
177
- if len(_all.keys()):
178
- all_keys = sorted(list(_all.keys()))
179
- for i in range(len(all_keys)):
180
- _sl = all_keys[i]
181
- _sl_in_span = _sl.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]",
182
- "...").replace(
183
- special, "././").replace('\n', '/n')
184
- # if _sl == '[]':
185
- # st.text(_sl)
186
- ___sl = _sl.replace("\n", "/n")
187
- _title = f'{i + 1}. <span id="{_sl_in_span}_{task}"> **:red[{___sl}]**</span>'
188
- # with st.expander(_title, expanded=True):
189
- _string = _title + '<br>Examples:<br>'
190
- # st.markdown(_title, unsafe_allow_html=True)
191
- # st.markdown(f'Examples:', unsafe_allow_html=True)
192
- _string += '<blockquote><ol>'
193
- for j in range(len(_all[_sl]['Original hypothesis'])):
194
- # _string += f'{j+1}. Original '
195
- _string += f'<li> **Original '
196
- if task == 'NLI':
197
- _string += 'hypothesis**: :'
198
- elif task == 'QA':
199
- _string += 'question**: :'
200
- elif task == 'paraphrase':
201
- _string += 'sentence 1**: :'
202
- _string += f'blue[{_all[_sl]["Original hypothesis"][j]}]'.replace(":", "[colon]")
203
- _string += '<br> **Replaced '
204
- if task == 'NLI':
205
- _string += 'hypothesis**: :'
206
- elif task == 'QA':
207
- _string += 'question**: :'
208
- elif task == 'paraphrase':
209
- _string += 'sentence 1**: :'
210
- _string += f'red[{_all[_sl]["Replaced hypothesis"][j]}]'.replace(":", "[colon]")
211
- if task == 'NLI':
212
- _string += '<br> **Premise**: :'
213
- elif task == 'QA':
214
- _string += '<br> **Text**: :'
215
- elif task == 'paraphrase':
216
- _string += '<br> **Sentence 2**: :'
217
- _string += f'blue[{_all[_sl]["Premise"][j]}]'.replace(":", "[colon]")
218
- _string += "<br>**Model's prediction:** :" + f'blue[{_all[_sl]["output"][j]}]'.replace(":", "[colon]")
219
- _string += '<br></li>'
220
- _string += '</ol></blockquote>'
221
- st.markdown(_string.replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
222
- unsafe_allow_html=True)
223
- # st.text(f'Examples: :blue[{new_dataframe.loc[i, "replaced sentences".replace(":", "[colon]")]}]')
224
- # st.dataframe(new_dataframe)
225
- st.markdown(hide_expander_border, unsafe_allow_html=True)
226
- else:
227
- st.error(f'We did not find any Secret Language of {title} on {task}.', icon="⚠️")
228
 
 
229
 
230
- with tab1:
231
- # st.header("NLI")
232
- # present(dataframe, 'NLI', title)
233
- present_dict(data_title, 'NLI')
234
- with tab2:
235
- # st.header("QA")
236
- # present(dataframe, 'QA', title)
237
- present_dict(data_title, 'QA')
238
- with tab3:
239
- # present(dataframe, 'Paraphrase', title)
240
- present_dict(data_title, 'paraphrase')
241
- st.markdown(
242
- f'<span id="jump">¹</span>*Enlish meaning is supported by [PyDictionary](https://pypi.org/project/PyDictionary/).*',
243
- unsafe_allow_html=True)
244
- else:
245
- st.error(f'{title} is not in the dictionary of Secret Language.', icon="⚠️")
 
1
  import streamlit as st
2
  import pandas as pd
3
+ # from txtai.embeddings import Embeddings
4
  # import gdown
5
  import os
6
+ from PIL import Image
 
7
 
8
+ search_image = Image.open('images/Search.png')
9
+ browse_image = Image.open('images/Browse.png')
 
10
 
11
  st.set_page_config(layout="wide", page_title="ACl23 Secret Language")
12
+ # with open('style.css') as f:
13
+ # st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ st.title("ACl23 Submission: Finding Secret Language")
 
 
 
17
 
18
+ st.markdown('This webpage serves as an illustration of an anonymous submission to ACL 23.')
19
 
20
+ st.markdown('### How to play with this page?')
21
+ st.markdown('We present two methods for searching secret language: a direct search using the Dictionary (Search) option, and browsing words that have already been found for secret languages.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ st.markdown("By entering a word you want to find its secret languages, you can view the word's meaning in English, all the secret languages we have discovered for it, and examples.")
24
+ st.image(search_image, caption='A search example.')
25
 
26
+ st.markdown('By clicking on the initial letters (A to Z, numbers, and other characters), you can view all the words whose secret languages have been discovered and that begin with the selected initial. By clicking on a word, you will be redirected to the search page, where you can view information about the selected word.')
27
+ st.image(browse_image, caption='A browse example.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ st.markdown('### Ethics statements for this webpage')
30
 
31
+ st.markdown('We present secret languages discovered using our proposed algorithms. '
32
+ 'Our experiments utilize publicly available NLP and cross-modal datasets and leverage pre-trained language models. '
33
+ 'We do not believe that our code or methods raise concerns regarding discrimination, bias, fairness, inappropriate potential applications, impact, privacy and security issues, legal compliance, research integrity, or research practice. '
34
+ 'However, the datasets and models used may contain biases that may be inherited by the models we use.')