import streamlit as st import pandas as pd # import gdown import os import pickle from collections import defaultdict, Counter from PyDictionary import PyDictionary dictionary = PyDictionary() st.set_page_config(layout="wide", page_title="ACl23 Secret Language") hide_expander_border = """ """ # st.title("ACl23 Secret Language") # sidebar st.sidebar.header("📙 Dictionary") _data = st.experimental_get_query_params() default_title = 'Asian' if _data: if 'word' in _data.keys(): default_title = _data['word'][0] title = st.sidebar.text_input(":red[Search secret languages given the following word (case-sensitive)]", default_title) if ord(title[0]) in list(range(48, 57)): file_name = 'num_dict.pkl' elif ord(title[0]) in list(range(97, 122)) + list(range(65, 90)): file_name = f'{ord(title[0])}_dict.pkl' else: file_name = 'other_dict.pkl' datas = pickle.load(open(f'all_secret_langauge_by_fist/{file_name}', 'rb')) if title in datas: st.title(title) st.markdown(f"## {title}'s meaning in English[¹](#jump)") # write the meaning of input word try: title_mean = dictionary.meaning(title) _string = '>' for key in title_mean: _string += f':violet[{key}]: {";".join(title_mean[key])}
' st.markdown(_string, unsafe_allow_html=True) except: st.error(f'We cannot find the meaning of {title} in English (PyDictionary), which might be due to the bug.', icon="🚨") st.markdown(f"---") st.markdown(f"## {title}'s secret languages") data_title = datas[title] title_secret_languages = list(sorted(list(set(data_title["secret languages"])))) # dataframe = pd.DataFrame(datas[title]) # st.markdown(f'### We found {len(set(dataframe.loc[:, "secret languages"]))} secret languages of {title}.', unsafe_allow_html=True) st.markdown(f'Overall, we found :red[{len(title_secret_languages)}] secret languages of :blue[{title}].', unsafe_allow_html=True) special = '"' # _title_secret_languages = [f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")})' # for i in title_secret_languages] # st.markdown('>' + ', '.join(_title_secret_languages).replace('', '\').replace('$', '\$').replace('~', '\~'), # unsafe_allow_html=True) secret_language_by_task = { 'QA':[], 'NLI':[], 'paraphrase':[], } for i in range(len(data_title['secret languages'])): secret_language_by_task[data_title['tasks'][i]].append(data_title['secret languages'][i]) for k in secret_language_by_task: secret_language_by_task[k] = list(set(secret_language_by_task[k])) def present_sl_task(secret_language_by_task, task): all_sl = sorted(secret_language_by_task[task]) st.markdown(f':red[{len(all_sl)}] secret languages of :blue[{title}] on {task.replace("paraphrase", "Paraphrase")}', unsafe_allow_html=True) special = '"' _title_secret_languages = [f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task})' for i in all_sl] st.markdown('>' + ', '.join(_title_secret_languages).replace('', '\').replace('$', '\$').replace('~', '\~'), unsafe_allow_html=True) present_sl_task(secret_language_by_task, 'NLI') present_sl_task(secret_language_by_task, 'QA') present_sl_task(secret_language_by_task, 'paraphrase') st.markdown(f"*Hyperlinks only function when the corresponding tab is open. " f"For example, the hyperlinks in the paraphrase section will only work when the paraphrase tab is open.*") st.markdown(f"---") st.markdown(f"## Examples of replaced sentences") # st.text(','.join(title_secret_languages).replace('', '\')) # st.dataframe(dataframe) _num = Counter(data_title['tasks']) tab1, tab2, tab3 = st.tabs([f'NLI ({_num["NLI"]})', f'QA ({_num["QA"]})', f'Paraphrase ({_num["paraphrase"]})']) def present_dataframe(dataframe, key, title): new_dataframe = dataframe.loc[dataframe['tasks'] == key].reset_index() new_dataframe['replaced sentences'] = new_dataframe['replaced sentences'].str.replace('', '[POS]') if len(new_dataframe): new_dataframe = new_dataframe.drop(columns=['tasks', 'index']) # st.markdown(new_dataframe.columns) for i in range(len(new_dataframe)): _title = f'{i + 1}\. **[{new_dataframe.loc[i, "secret languages"]}]**' with st.expander(_title): # _string = f'{i + 1}. :red[{new_dataframe.loc[i, "secret languages"]}]' _string = 'Original ' if key == 'NLI': _string += 'hypothesis: :' elif key == 'QA': _string += 'question: :' elif key == 'Paraphrase': _string += 'sentence 1: :' _string += f'blue[{new_dataframe.loc[i, "original sentences"]}]'.replace(":", "[colon]") _string += '
Replaced ' if key == 'NLI': _string += 'hypothesis: :' elif key == 'QA': _string += 'question: :' elif key == 'Paraphrase': _string += 'sentence 1: :' _string += f'red[{new_dataframe.loc[i, "replaced sentences"]}]'.replace(":", "[colon]") if key == 'NLI': _string += '
premise: :' elif key == 'QA': _string += '
text: :' elif key == 'Paraphrase': _string += '
sentence 2: :' _string += f'blue[{new_dataframe.loc[i, "premise / sentence 2 / text"]}]'.replace(":", "[colon]") st.markdown(_string, unsafe_allow_html=True) # st.text(f'Examples: :blue[{new_dataframe.loc[i, "replaced sentences".replace(":", "[colon]")]}]') # st.dataframe(new_dataframe) st.markdown(hide_expander_border, unsafe_allow_html=True) else: st.error(f'We did not find any Secret Language of {title} on {key}.') def present_dict(_dict, task): # st.text(set(_dict['tasks'])) _all = defaultdict(int) for i in range(len(_dict['secret languages'])): if _dict['tasks'][i] == task: _sl = _dict['secret languages'][i] if type(_all[_sl]) == int: _all[_sl] = { 'Original hypothesis': [], 'Replaced hypothesis': [], 'Premise': [] } _all[_sl]['Original hypothesis'].append(_dict['original sentences'][i]) if task == 'QA': _all[_sl]['Replaced hypothesis'].append(_dict['replaced sentences'][i].replace('', '')) else: _all[_sl]['Replaced hypothesis'].append(_dict['replaced sentences'][i]) _all[_sl]['Premise'].append(_dict['premise / sentence 2 / text'][i]) if len(_all.keys()): all_keys = sorted(list(_all.keys())) for i in range(len(all_keys)): _sl = all_keys[i] _sl_in_span = _sl.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././") _title = f'{i + 1}. **:red[{_sl}]**' # with st.expander(_title, expanded=True): _string = _title + '
Examples:
' # st.markdown(_title, unsafe_allow_html=True) # st.markdown(f'Examples:', unsafe_allow_html=True) _string += '
    ' for j in range(len(_all[_sl]['Original hypothesis'])): # _string += f'{j+1}. Original ' _string += f'
  1. **Original ' if task == 'NLI': _string += 'hypothesis**: :' elif task == 'QA': _string += 'question**: :' elif task == 'paraphrase': _string += 'sentence 1**: :' _string += f'blue[{_all[_sl]["Original hypothesis"][j]}]'.replace(":", "[colon]") _string += '
    **Replaced ' if task == 'NLI': _string += 'hypothesis**: :' elif task == 'QA': _string += 'question**: :' elif task == 'paraphrase': _string += 'sentence 1**: :' _string += f'red[{_all[_sl]["Replaced hypothesis"][j]}]'.replace(":", "[colon]") if task == 'NLI': _string += '
    **premise**: :' elif task == 'QA': _string += '
    **text**: :' elif task == 'paraphrase': _string += '
    **sentence 2**: :' _string += f'blue[{_all[_sl]["Premise"][j]}]'.replace(":", "[colon]") _string += '
  2. ' _string += '
' st.markdown(_string.replace('', '\').replace('$', '\$').replace('~', '\~'), unsafe_allow_html=True) # st.text(f'Examples: :blue[{new_dataframe.loc[i, "replaced sentences".replace(":", "[colon]")]}]') # st.dataframe(new_dataframe) st.markdown(hide_expander_border, unsafe_allow_html=True) else: st.error(f'We did not find any Secret Language of {title} on {task}.', icon="⚠️") with tab1: # st.header("NLI") # present(dataframe, 'NLI', title) present_dict(data_title, 'NLI') with tab2: # st.header("QA") # present(dataframe, 'QA', title) present_dict(data_title, 'QA') with tab3: # present(dataframe, 'Paraphrase', title) present_dict(data_title, 'paraphrase') st.markdown( f'¹*Enlish meaning is supported by [PyDictionary](https://pypi.org/project/PyDictionary/).*', unsafe_allow_html=True) else: st.error(f'{title} is not in the dictionary of Secret Language.', icon="⚠️")