Spaces:
Runtime error
Runtime error
File size: 14,395 Bytes
701f8ff cba28b4 701f8ff 5a5f4eb 701f8ff 8e1b917 d8663a9 e735e42 701f8ff 75d0a70 701f8ff 5fb4442 38b6dfd 5fb4442 38b6dfd 621caef 5fb4442 38b6dfd 5fb4442 701f8ff e735e42 5a5f4eb 1ff8db6 5a5f4eb 701f8ff 265c345 701f8ff e735e42 701f8ff 5a5f4eb 701f8ff 63e2daf 701f8ff 5a5f4eb 701f8ff e735e42 75d0a70 701f8ff e735e42 701f8ff b00878a 63e2daf b00878a 30c7c90 e735e42 701f8ff 75d0a70 701f8ff 30c7c90 3e701f9 30c7c90 5a5f4eb 30c7c90 5a5f4eb 701f8ff 3e701f9 701f8ff 63e2daf 701f8ff 63e2daf 701f8ff 63e2daf 701f8ff 63e2daf 1ff8db6 701f8ff 1ff8db6 701f8ff e735e42 63e2daf e735e42 63e2daf 701f8ff 265c345 701f8ff 265c345 701f8ff 30c7c90 701f8ff 63e2daf 701f8ff 63e2daf 265c345 63e2daf 701f8ff 63e2daf 701f8ff e735e42 701f8ff 75d0a70 30c7c90 701f8ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 |
import streamlit as st
import pandas as pd
# import gdown
import os
import pickle
from collections import defaultdict, Counter
from streamlit_extras.colored_header import colored_header
from PyDictionary import PyDictionary
dictionary = PyDictionary()
st.set_page_config(layout="wide", page_title="ACl23 Secret Language")
hide_expander_border = """
<style>
.st-bd {border-style: none;}
</style>
"""
# st.title("ACl23 Secret Language")
# sidebar
st.sidebar.header("📙 Dictionary")
_data = st.experimental_get_query_params()
default_title = 'Asian'
if _data:
if 'word' in _data.keys():
default_title = _data['word'][0]
if 'click_word' in st.session_state:
default_title = st.session_state.click_word
for key in st.session_state.keys():
if key != 'click_word':
del st.session_state[key]
title = st.sidebar.text_input(":red[Search secret languages given the following word (case-sensitive)]", default_title)
st.sidebar.markdown("### Frequent FAQs")
st.sidebar.markdown("1. :blue[*Why are words in sentences represented as subwords instead of complete words?*]<br>"
"The tokenizer we use is from DistillBERT, ALBERT, or Roberta, which tokenizes sentences into subwords. As a result, the word being replaced in a sentence might be a subword (such as `rain` in `rainforest`).",
unsafe_allow_html=True)
st.sidebar.markdown("2. :blue[*This page is extremely slow. I cannot stand it.*]<br>"
"We apologize for the slow performance of this page. We are actively working on improving it. "
"As loading the data can take time and some words have many secret languages, this page needs time to process.",
unsafe_allow_html=True)
st.sidebar.markdown("3. :blue[*Why are some examples significantly different from the original sentences?*] <br>"
"As per our submission, we replace 1 to 10 subwords in a sentence. However, for some examples with short lengths, the entire sentence may be altered. We are conducting experiments and will present examples where only a single subword has been changed.",
unsafe_allow_html=True)
if ord(title[0]) in list(range(48, 57)):
file_name = 'num_dict.pkl'
elif ord(title[0]) in list(range(97, 122)) + list(range(65, 90)):
file_name = f'{ord(title[0])}_dict.pkl'
else:
file_name = 'other_dict.pkl'
datas = pickle.load(open(f'all_secret_langauge_by_fist/{file_name}', 'rb'))
if title in datas:
st.title(title)
# st.markdown(f":red[[]]")
# st.markdown(f"## {title}'s meaning in English[¹](#jump)")
colored_header(
label=f"{title}'s meaning in English[¹](#jump)",
description="**English meaning is supported by [PyDictionary](https://pypi.org/project/PyDictionary/)*",
color_name="violet-70",
)
# write the meaning of input word
try:
title_mean = dictionary.meaning(title)
_string = '>'
for key in title_mean:
_string += f':violet[{key}]: {"; ".join(title_mean[key])}<br>'
st.markdown(_string, unsafe_allow_html=True)
except:
st.error(f'We cannot find the meaning of {title} in English (PyDictionary), which might be due to the bug.',
icon="🚨")
# st.markdown(f"---")
# st.markdown(f"## {title}'s secret languages")
data_title = datas[title]
# st.markdown(data_title.keys())
title_secret_languages = list(sorted(list(set(data_title["secret languages"]))))
# dataframe = pd.DataFrame(datas[title])
# st.markdown(f'### We found {len(set(dataframe.loc[:, "secret languages"]))} secret languages of {title}.', unsafe_allow_html=True)
# st.markdown(f'Overall, we found :red[{len(title_secret_languages)}] secret languages of :blue[{title}].',
# unsafe_allow_html=True)
colored_header(
label=f"{title}'s secret languages",
description=f'Overall, we found :red[{len(title_secret_languages)}] secret languages of :blue[{title}].',
color_name="red-70",
)
special = '"'
# _title_secret_languages = [f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")})'
# for i in title_secret_languages]
# st.markdown('>' + ', '.join(_title_secret_languages).replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
# unsafe_allow_html=True)
secret_language_by_task = {
'QA': [],
'NLI': [],
'Paraphrase': [],
}
for i in range(len(data_title['secret languages'])):
secret_language_by_task[data_title['tasks'][i]].append(data_title['secret languages'][i])
for k in secret_language_by_task:
secret_language_by_task[k] = list(set(secret_language_by_task[k]))
def present_sl_task(secret_language_by_task, task):
all_sl = sorted(secret_language_by_task[task])
new_all_sl = []
for i in range(len(all_sl)):
if all_sl[i] != '':
new_all_sl.append(all_sl[i].replace("\n", "/n").strip())
all_sl = sorted(new_all_sl)
with st.expander(f'***{len(all_sl)}*** secret languages of ***{title}*** on {task.replace("paraphrase", "Paraphrase")}'):
# st.markdown(
# f':red[{len(all_sl)}] secret languages of :blue[{title}] on {task.replace("paraphrase", "Paraphrase")}',
# unsafe_allow_html=True)
special = '"'
_title_secret_languages = [
# f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task})'
f'<a href="#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task}">{i}</a>'
for i in all_sl]
st.markdown(
# '>' + ', '.join(_title_secret_languages).replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
', '.join(_title_secret_languages).replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
unsafe_allow_html=True)
present_sl_task(secret_language_by_task, 'NLI')
present_sl_task(secret_language_by_task, 'QA')
present_sl_task(secret_language_by_task, 'Paraphrase')
st.caption(f"\**Hyperlinks only function when the corresponding tab is open. "
f"For example, the hyperlinks in the paraphrase section will only work when the paraphrase tab is open. However, due to the container property of Hugging Face Space, the hyperlinks might be not able to function.*")
st.caption('\**Due to the grammatical properties of HTML, the layout of this page may vary.*')
colored_header(
label=f"Examples of replaced sentences",
description=f'**The number following the tasks represents the number of examples found for a particular task, which may be different from the number of secret languages.*',
color_name="orange-70",
)
_num = Counter(data_title['tasks'])
tab1, tab2, tab3 = st.tabs([f'NLI ({_num["NLI"]})', f'QA ({_num["QA"]})', f'Paraphrase ({_num["Paraphrase"]})'])
def present_dataframe(dataframe, key, title):
new_dataframe = dataframe.loc[dataframe['tasks'] == key].reset_index()
new_dataframe['replaced sentences'] = new_dataframe['replaced sentences'].str.replace('<s>', '[POS]')
if len(new_dataframe):
new_dataframe = new_dataframe.drop(columns=['tasks', 'index'])
# st.markdown(new_dataframe.columns)
for i in range(len(new_dataframe)):
_title = f'{i + 1}\. **[{new_dataframe.loc[i, "secret languages"]}]**'
with st.expander(_title):
# _string = f'{i + 1}. :red[{new_dataframe.loc[i, "secret languages"]}]'
_string = 'Original '
if key == 'NLI':
_string += 'hypothesis: :'
elif key == 'QA':
_string += 'question: :'
elif key == 'Paraphrase':
_string += 'sentence 1: :'
_string += f'blue[{new_dataframe.loc[i, "original sentences"]}]'.replace(":", "[colon]")
_string += '<br>Replaced '
if key == 'NLI':
_string += 'hypothesis: :'
elif key == 'QA':
_string += 'question: :'
elif key == 'Paraphrase':
_string += 'sentence 1: :'
_string += f'red[{new_dataframe.loc[i, "replaced sentences"]}]'.replace(":", "[colon]")
if key == 'NLI':
_string += '<br>Premise: :'
elif key == 'QA':
_string += '<br>Text: :'
elif key == 'Paraphrase':
_string += '<br>Sentence 2: :'
_string += f'blue[{new_dataframe.loc[i, "premise / sentence 2 / text"]}]'.replace(":", "[colon]")
st.markdown(_string, unsafe_allow_html=True)
# st.text(f'Examples: :blue[{new_dataframe.loc[i, "replaced sentences".replace(":", "[colon]")]}]')
# st.dataframe(new_dataframe)
st.markdown(hide_expander_border, unsafe_allow_html=True)
else:
st.error(f'We did not find any Secret Language of {title} on {key}.')
def present_dict(_dict, task):
# st.text(set(_dict['tasks']))
_all = defaultdict(int)
for i in range(len(_dict['secret languages'])):
if _dict['tasks'][i] == task:
_sl = _dict['secret languages'][i]
if type(_all[_sl]) == int:
_all[_sl] = {
'Original hypothesis': [],
'Replaced hypothesis': [],
'Premise': [],
'output': [],
}
if _dict['output_ori'][i] == _dict['output_rep'][i]:
_all[_sl]['Original hypothesis'].append(_dict['original sentences'][i])
if task == 'QA':
_all[_sl]['Replaced hypothesis'].append(_dict['replaced sentences'][i].replace('<s>', ''))
else:
_all[_sl]['Replaced hypothesis'].append(_dict['replaced sentences'][i].replace('[CLS]', '', 1))
_all[_sl]['Premise'].append(_dict['premise / sentence 2 / text'][i])
_all[_sl]['output'].append(_dict['output_ori'][i])
if len(_all.keys()):
all_keys = sorted(list(_all.keys()))
for i in range(len(all_keys)):
_sl = all_keys[i]
_sl_in_span = _sl.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]",
"...").replace(
special, "././").replace('\n', '/n')
# if _sl == '[]':
# st.text(_sl)
___sl = _sl.replace("\n", "/n")
_title = f'{i + 1}. <span id="{_sl_in_span}_{task}"> **:red[{___sl}]**</span>'
# with st.expander(_title, expanded=True):
_string = _title + '<br>Examples:<br>'
# st.markdown(_title, unsafe_allow_html=True)
# st.markdown(f'Examples:', unsafe_allow_html=True)
_string += '<blockquote><ol>'
for j in range(len(_all[_sl]['Original hypothesis'])):
# _string += f'{j+1}. Original '
_string += f'<li> **Original '
if task == 'NLI':
_string += 'hypothesis**: :'
elif task == 'QA':
_string += 'question**: :'
elif task == 'Paraphrase':
_string += 'sentence 1**: :'
_string += f'blue[{_all[_sl]["Original hypothesis"][j]}]'.replace(":", "[colon]")
_string += '<br> **Replaced '
if task == 'NLI':
_string += 'hypothesis**: :'
elif task == 'QA':
_string += 'question**: :'
elif task == 'Paraphrase':
_string += 'sentence 1**: :'
_string += f'red[{_all[_sl]["Replaced hypothesis"][j]}]'.replace('/', '\\').replace(_sl, f"<i><b>{___sl}</b></i>").replace(":", "[colon]")
if task == 'NLI':
_string += '<br> **Premise**: :'
elif task == 'QA':
_string += '<br> **Text**: :'
elif task == 'Paraphrase':
_string += '<br> **Sentence 2**: :'
_string += f'blue[{_all[_sl]["Premise"][j]}]'.replace(":", "[colon]")
_string += "<br>**Model's prediction:** :" + f'blue[{_all[_sl]["output"][j]}]'.replace(":", "[colon]")
_string += '<br></li>'
_string += '</ol></blockquote>'
st.markdown(_string.replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
unsafe_allow_html=True)
# st.text(f'Examples: :blue[{new_dataframe.loc[i, "replaced sentences".replace(":", "[colon]")]}]')
# st.dataframe(new_dataframe)
st.markdown(hide_expander_border, unsafe_allow_html=True)
else:
st.error(f'We did not find any Secret Language of {title} on {task}.', icon="⚠️")
with tab1:
# st.header("NLI")
# present(dataframe, 'NLI', title)
present_dict(data_title, 'NLI')
with tab2:
# st.header("QA")
# present(dataframe, 'QA', title)
present_dict(data_title, 'QA')
with tab3:
# present(dataframe, 'Paraphrase', title)
present_dict(data_title, 'Paraphrase')
# st.markdown(
# f'<span id="jump">¹</span>*Enlish meaning is supported by [PyDictionary](https://pypi.org/project/PyDictionary/).*',
# unsafe_allow_html=True)
else:
st.error(f'{title} is not in the dictionary of Secret Language.', icon="⚠️")
|