SecretLanguage / pages /0_πŸ“™_Dictionary_(Search).py
AnonymousAuthors's picture
Update pages/0_πŸ“™_Dictionary_(Search).py
e735e42
raw
history blame
11 kB
import streamlit as st
import pandas as pd
# import gdown
import os
import pickle
from collections import defaultdict, Counter
from PyDictionary import PyDictionary
dictionary = PyDictionary()
st.set_page_config(layout="wide", page_title="ACl23 Secret Language")
hide_expander_border = """
<style>
.st-bd {border-style: none;}
</style>
"""
# st.title("ACl23 Secret Language")
# sidebar
st.sidebar.header("πŸ“™ Dictionary")
_data = st.experimental_get_query_params()
default_title = 'Asian'
if _data:
if 'word' in _data.keys():
default_title = _data['word'][0]
if 'click_word' in st.session_state:
default_title = st.session_state.click_word
title = st.sidebar.text_input(":red[Search secret languages given the following word (case-sensitive)]", default_title)
if ord(title[0]) in list(range(48, 57)):
file_name = 'num_dict.pkl'
elif ord(title[0]) in list(range(97, 122)) + list(range(65, 90)):
file_name = f'{ord(title[0])}_dict.pkl'
else:
file_name = 'other_dict.pkl'
datas = pickle.load(open(f'all_secret_langauge_by_fist/{file_name}', 'rb'))
if title in datas:
st.title(title)
# st.markdown(f":red[[]]")
st.markdown(f"## {title}'s meaning in English[ΒΉ](#jump)")
# write the meaning of input word
try:
title_mean = dictionary.meaning(title)
_string = '>'
for key in title_mean:
_string += f':violet[{key}]: {";".join(title_mean[key])}<br>'
st.markdown(_string, unsafe_allow_html=True)
except:
st.error(f'We cannot find the meaning of {title} in English (PyDictionary), which might be due to the bug.',
icon="🚨")
st.markdown(f"---")
st.markdown(f"## {title}'s secret languages")
data_title = datas[title]
title_secret_languages = list(sorted(list(set(data_title["secret languages"]))))
# dataframe = pd.DataFrame(datas[title])
# st.markdown(f'### We found {len(set(dataframe.loc[:, "secret languages"]))} secret languages of {title}.', unsafe_allow_html=True)
st.markdown(f'Overall, we found :red[{len(title_secret_languages)}] secret languages of :blue[{title}].',
unsafe_allow_html=True)
special = '"'
# _title_secret_languages = [f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")})'
# for i in title_secret_languages]
# st.markdown('>' + ', '.join(_title_secret_languages).replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
# unsafe_allow_html=True)
secret_language_by_task = {
'QA': [],
'NLI': [],
'paraphrase': [],
}
for i in range(len(data_title['secret languages'])):
secret_language_by_task[data_title['tasks'][i]].append(data_title['secret languages'][i])
for k in secret_language_by_task:
secret_language_by_task[k] = list(set(secret_language_by_task[k]))
def present_sl_task(secret_language_by_task, task):
all_sl = sorted(secret_language_by_task[task])
st.markdown(
f':red[{len(all_sl)}] secret languages of :blue[{title}] on {task.replace("paraphrase", "Paraphrase")}',
unsafe_allow_html=True)
special = '"'
_title_secret_languages = [
f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task})'
for i in all_sl]
st.markdown(
'>' + ', '.join(_title_secret_languages).replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
unsafe_allow_html=True)
present_sl_task(secret_language_by_task, 'NLI')
present_sl_task(secret_language_by_task, 'QA')
present_sl_task(secret_language_by_task, 'paraphrase')
st.markdown(f"*Hyperlinks only function when the corresponding tab is open. "
f"For example, the hyperlinks in the paraphrase section will only work when the paraphrase tab is open.*")
st.markdown(f"---")
st.markdown(f"## Examples of replaced sentences")
# st.text(','.join(title_secret_languages).replace('<s>', '\<s\>'))
# st.dataframe(dataframe)
_num = Counter(data_title['tasks'])
tab1, tab2, tab3 = st.tabs([f'NLI ({_num["NLI"]})', f'QA ({_num["QA"]})', f'Paraphrase ({_num["paraphrase"]})'])
def present_dataframe(dataframe, key, title):
new_dataframe = dataframe.loc[dataframe['tasks'] == key].reset_index()
new_dataframe['replaced sentences'] = new_dataframe['replaced sentences'].str.replace('<s>', '[POS]')
if len(new_dataframe):
new_dataframe = new_dataframe.drop(columns=['tasks', 'index'])
# st.markdown(new_dataframe.columns)
for i in range(len(new_dataframe)):
_title = f'{i + 1}\. **[{new_dataframe.loc[i, "secret languages"]}]**'
with st.expander(_title):
# _string = f'{i + 1}. :red[{new_dataframe.loc[i, "secret languages"]}]'
_string = 'Original '
if key == 'NLI':
_string += 'hypothesis: :'
elif key == 'QA':
_string += 'question: :'
elif key == 'Paraphrase':
_string += 'sentence 1: :'
_string += f'blue[{new_dataframe.loc[i, "original sentences"]}]'.replace(":", "[colon]")
_string += '<br>Replaced '
if key == 'NLI':
_string += 'hypothesis: :'
elif key == 'QA':
_string += 'question: :'
elif key == 'Paraphrase':
_string += 'sentence 1: :'
_string += f'red[{new_dataframe.loc[i, "replaced sentences"]}]'.replace(":", "[colon]")
if key == 'NLI':
_string += '<br>premise: :'
elif key == 'QA':
_string += '<br>text: :'
elif key == 'Paraphrase':
_string += '<br>sentence 2: :'
_string += f'blue[{new_dataframe.loc[i, "premise / sentence 2 / text"]}]'.replace(":", "[colon]")
st.markdown(_string, unsafe_allow_html=True)
# st.text(f'Examples: :blue[{new_dataframe.loc[i, "replaced sentences".replace(":", "[colon]")]}]')
# st.dataframe(new_dataframe)
st.markdown(hide_expander_border, unsafe_allow_html=True)
else:
st.error(f'We did not find any Secret Language of {title} on {key}.')
def present_dict(_dict, task):
# st.text(set(_dict['tasks']))
_all = defaultdict(int)
for i in range(len(_dict['secret languages'])):
if _dict['tasks'][i] == task:
_sl = _dict['secret languages'][i]
if type(_all[_sl]) == int:
_all[_sl] = {
'Original hypothesis': [],
'Replaced hypothesis': [],
'Premise': []
}
_all[_sl]['Original hypothesis'].append(_dict['original sentences'][i])
if task == 'QA':
_all[_sl]['Replaced hypothesis'].append(_dict['replaced sentences'][i].replace('<s>', ''))
else:
_all[_sl]['Replaced hypothesis'].append(_dict['replaced sentences'][i])
_all[_sl]['Premise'].append(_dict['premise / sentence 2 / text'][i])
if len(_all.keys()):
all_keys = sorted(list(_all.keys()))
for i in range(len(all_keys)):
_sl = all_keys[i]
_sl_in_span = _sl.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]",
"...").replace(
special, "././")
# if _sl == '[]':
# st.text(_sl)
_title = f'{i + 1}. <span id="{_sl_in_span}_{task}"> **:red[{_sl}]**</span>'
# with st.expander(_title, expanded=True):
_string = _title + '<br>Examples:<br>'
# st.markdown(_title, unsafe_allow_html=True)
# st.markdown(f'Examples:', unsafe_allow_html=True)
_string += '<blockquote><ol>'
for j in range(len(_all[_sl]['Original hypothesis'])):
# _string += f'{j+1}. Original '
_string += f'<li> **Original '
if task == 'NLI':
_string += 'hypothesis**: :'
elif task == 'QA':
_string += 'question**: :'
elif task == 'paraphrase':
_string += 'sentence 1**: :'
_string += f'blue[{_all[_sl]["Original hypothesis"][j]}]'.replace(":", "[colon]")
_string += '<br> **Replaced '
if task == 'NLI':
_string += 'hypothesis**: :'
elif task == 'QA':
_string += 'question**: :'
elif task == 'paraphrase':
_string += 'sentence 1**: :'
_string += f'red[{_all[_sl]["Replaced hypothesis"][j]}]'.replace(":", "[colon]")
if task == 'NLI':
_string += '<br> **premise**: :'
elif task == 'QA':
_string += '<br> **text**: :'
elif task == 'paraphrase':
_string += '<br> **sentence 2**: :'
_string += f'blue[{_all[_sl]["Premise"][j]}]'.replace(":", "[colon]")
_string += '<br></li>'
_string += '</ol></blockquote>'
st.markdown(_string.replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
unsafe_allow_html=True)
# st.text(f'Examples: :blue[{new_dataframe.loc[i, "replaced sentences".replace(":", "[colon]")]}]')
# st.dataframe(new_dataframe)
st.markdown(hide_expander_border, unsafe_allow_html=True)
else:
st.error(f'We did not find any Secret Language of {title} on {task}.', icon="⚠️")
with tab1:
# st.header("NLI")
# present(dataframe, 'NLI', title)
present_dict(data_title, 'NLI')
with tab2:
# st.header("QA")
# present(dataframe, 'QA', title)
present_dict(data_title, 'QA')
with tab3:
# present(dataframe, 'Paraphrase', title)
present_dict(data_title, 'paraphrase')
st.markdown(
f'<span id="jump">ΒΉ</span>*Enlish meaning is supported by [PyDictionary](https://pypi.org/project/PyDictionary/).*',
unsafe_allow_html=True)
else:
st.error(f'{title} is not in the dictionary of Secret Language.', icon="⚠️")