import streamlit as st import pandas as pd # import gdown import os import pickle from collections import defaultdict, Counter from streamlit_extras.colored_header import colored_header from PyDictionary import PyDictionary dictionary = PyDictionary() st.set_page_config(layout="wide", page_title="ACl23 Secret Language") hide_expander_border = """ """ # st.title("ACl23 Secret Language") # sidebar st.sidebar.header("📙 Dictionary") _data = st.experimental_get_query_params() default_title = 'Asian' if _data: if 'word' in _data.keys(): default_title = _data['word'][0] if 'click_word' in st.session_state: default_title = st.session_state.click_word for key in st.session_state.keys(): if key != 'click_word': del st.session_state[key] title = st.sidebar.text_input(":red[Search secret languages given the following word (case-sensitive)]", default_title) st.sidebar.markdown("### Frequent FAQs") st.sidebar.markdown("1. :blue[*Why are words in sentences represented as subwords instead of complete words?*]
" "The tokenizer we use is from DistillBERT, ALBERT, or Roberta, which tokenizes sentences into subwords. As a result, the word being replaced in a sentence might be a subword (such as `rain` in `rainforest`).", unsafe_allow_html=True) st.sidebar.markdown("2. :blue[*This page is extremely slow. I cannot stand it.*]
" "We apologize for the slow performance of this page. We are actively working on improving it. " "As loading the data can take time and some words have many secret languages, this page needs time to process.", unsafe_allow_html=True) st.sidebar.markdown("3. :blue[*Why are some examples significantly different from the original sentences?*]
" "As per our submission, we replace 1 to 10 subwords in a sentence. However, for some examples with short lengths, the entire sentence may be altered. We are conducting experiments and will present examples where only a single subword has been changed.", unsafe_allow_html=True) if ord(title[0]) in list(range(48, 57)): file_name = 'num_dict.pkl' elif ord(title[0]) in list(range(97, 122)) + list(range(65, 90)): file_name = f'{ord(title[0])}_dict.pkl' else: file_name = 'other_dict.pkl' datas = pickle.load(open(f'all_secret_langauge_by_fist/{file_name}', 'rb')) if title in datas: st.title(title) # st.markdown(f":red[[]]") # st.markdown(f"## {title}'s meaning in English[¹](#jump)") colored_header( label=f"{title}'s meaning in English[¹](#jump)", description="**Enlish meaning is supported by [PyDictionary](https://pypi.org/project/PyDictionary/)*", color_name="violet-70", ) # write the meaning of input word try: title_mean = dictionary.meaning(title) _string = '>' for key in title_mean: _string += f':violet[{key}]: {"; ".join(title_mean[key])}
' st.markdown(_string, unsafe_allow_html=True) except: st.error(f'We cannot find the meaning of {title} in English (PyDictionary), which might be due to the bug.', icon="🚨") # st.markdown(f"---") # st.markdown(f"## {title}'s secret languages") data_title = datas[title] # st.markdown(data_title.keys()) title_secret_languages = list(sorted(list(set(data_title["secret languages"])))) # dataframe = pd.DataFrame(datas[title]) # st.markdown(f'### We found {len(set(dataframe.loc[:, "secret languages"]))} secret languages of {title}.', unsafe_allow_html=True) # st.markdown(f'Overall, we found :red[{len(title_secret_languages)}] secret languages of :blue[{title}].', # unsafe_allow_html=True) colored_header( label=f"{title}'s secret languages", description=f'Overall, we found :red[{len(title_secret_languages)}] secret languages of :blue[{title}].', color_name="red-70", ) special = '"' # _title_secret_languages = [f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")})' # for i in title_secret_languages] # st.markdown('>' + ', '.join(_title_secret_languages).replace('', '\').replace('$', '\$').replace('~', '\~'), # unsafe_allow_html=True) secret_language_by_task = { 'QA': [], 'NLI': [], 'Paraphrase': [], } for i in range(len(data_title['secret languages'])): secret_language_by_task[data_title['tasks'][i]].append(data_title['secret languages'][i]) for k in secret_language_by_task: secret_language_by_task[k] = list(set(secret_language_by_task[k])) def present_sl_task(secret_language_by_task, task): all_sl = sorted(secret_language_by_task[task]) new_all_sl = [] for i in range(len(all_sl)): if all_sl[i] != '': new_all_sl.append(all_sl[i].replace("\n", "/n").strip()) all_sl = sorted(new_all_sl) with st.expander(f'***{len(all_sl)}*** secret languages of ***{title}*** on {task.replace("paraphrase", "Paraphrase")}'): # st.markdown( # f':red[{len(all_sl)}] secret languages of :blue[{title}] on {task.replace("paraphrase", "Paraphrase")}', # unsafe_allow_html=True) special = '"' _title_secret_languages = [ # f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task})' f'{i}' for i in all_sl] st.markdown( # '>' + ', '.join(_title_secret_languages).replace('', '\').replace('$', '\$').replace('~', '\~'), ', '.join(_title_secret_languages).replace('', '\').replace('$', '\$').replace('~', '\~'), unsafe_allow_html=True) present_sl_task(secret_language_by_task, 'NLI') present_sl_task(secret_language_by_task, 'QA') present_sl_task(secret_language_by_task, 'Paraphrase') st.caption(f"\**Hyperlinks only function when the corresponding tab is open. " f"For example, the hyperlinks in the paraphrase section will only work when the paraphrase tab is open. However, due to the container property of Hugging Face Space, the hyperlinks might be not able to function.*") st.caption('\**Due to the grammatical properties of HTML, the layout of this page may vary.*') colored_header( label=f"Examples of replaced sentences", description=f'**The number following the tasks represents the number of examples found for a particular task, which may be different from the number of secret languages.*', color_name="orange-70", ) _num = Counter(data_title['tasks']) tab1, tab2, tab3 = st.tabs([f'NLI ({_num["NLI"]})', f'QA ({_num["QA"]})', f'Paraphrase ({_num["Paraphrase"]})']) def present_dataframe(dataframe, key, title): new_dataframe = dataframe.loc[dataframe['tasks'] == key].reset_index() new_dataframe['replaced sentences'] = new_dataframe['replaced sentences'].str.replace('', '[POS]') if len(new_dataframe): new_dataframe = new_dataframe.drop(columns=['tasks', 'index']) # st.markdown(new_dataframe.columns) for i in range(len(new_dataframe)): _title = f'{i + 1}\. **[{new_dataframe.loc[i, "secret languages"]}]**' with st.expander(_title): # _string = f'{i + 1}. :red[{new_dataframe.loc[i, "secret languages"]}]' _string = 'Original ' if key == 'NLI': _string += 'hypothesis: :' elif key == 'QA': _string += 'question: :' elif key == 'Paraphrase': _string += 'sentence 1: :' _string += f'blue[{new_dataframe.loc[i, "original sentences"]}]'.replace(":", "[colon]") _string += '
Replaced ' if key == 'NLI': _string += 'hypothesis: :' elif key == 'QA': _string += 'question: :' elif key == 'Paraphrase': _string += 'sentence 1: :' _string += f'red[{new_dataframe.loc[i, "replaced sentences"]}]'.replace(":", "[colon]") if key == 'NLI': _string += '
Premise: :' elif key == 'QA': _string += '
Text: :' elif key == 'Paraphrase': _string += '
Sentence 2: :' _string += f'blue[{new_dataframe.loc[i, "premise / sentence 2 / text"]}]'.replace(":", "[colon]") st.markdown(_string, unsafe_allow_html=True) # st.text(f'Examples: :blue[{new_dataframe.loc[i, "replaced sentences".replace(":", "[colon]")]}]') # st.dataframe(new_dataframe) st.markdown(hide_expander_border, unsafe_allow_html=True) else: st.error(f'We did not find any Secret Language of {title} on {key}.') def present_dict(_dict, task): # st.text(set(_dict['tasks'])) _all = defaultdict(int) for i in range(len(_dict['secret languages'])): if _dict['tasks'][i] == task: _sl = _dict['secret languages'][i] if type(_all[_sl]) == int: _all[_sl] = { 'Original hypothesis': [], 'Replaced hypothesis': [], 'Premise': [], 'output': [] } _all[_sl]['Original hypothesis'].append(_dict['original sentences'][i]) if task == 'QA': _all[_sl]['Replaced hypothesis'].append(_dict['replaced sentences'][i].replace('', '')) else: _all[_sl]['Replaced hypothesis'].append(_dict['replaced sentences'][i].replace('[CLS]', '', 1)) _all[_sl]['Premise'].append(_dict['premise / sentence 2 / text'][i]) _all[_sl]['output'].append(_dict['output'][i]) if len(_all.keys()): all_keys = sorted(list(_all.keys())) for i in range(len(all_keys)): _sl = all_keys[i] _sl_in_span = _sl.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace( special, "././").replace('\n', '/n') # if _sl == '[]': # st.text(_sl) ___sl = _sl.replace("\n", "/n") _title = f'{i + 1}. **:red[{___sl}]**' # with st.expander(_title, expanded=True): _string = _title + '
Examples:
' # st.markdown(_title, unsafe_allow_html=True) # st.markdown(f'Examples:', unsafe_allow_html=True) _string += '
    ' for j in range(len(_all[_sl]['Original hypothesis'])): # _string += f'{j+1}. Original ' _string += f'
  1. **Original ' if task == 'NLI': _string += 'hypothesis**: :' elif task == 'QA': _string += 'question**: :' elif task == 'Paraphrase': _string += 'sentence 1**: :' _string += f'blue[{_all[_sl]["Original hypothesis"][j]}]'.replace(":", "[colon]") _string += '
    **Replaced ' if task == 'NLI': _string += 'hypothesis**: :' elif task == 'QA': _string += 'question**: :' elif task == 'Paraphrase': _string += 'sentence 1**: :' _string += f'red[{_all[_sl]["Replaced hypothesis"][j]}]'.replace('/', '\\').replace(_sl, f"{___sl}").replace(":", "[colon]") if task == 'NLI': _string += '
    **Premise**: :' elif task == 'QA': _string += '
    **Text**: :' elif task == 'Paraphrase': _string += '
    **Sentence 2**: :' _string += f'blue[{_all[_sl]["Premise"][j]}]'.replace(":", "[colon]") _string += "
    **Model's prediction:** :" + f'blue[{_all[_sl]["output"][j]}]'.replace(":", "[colon]") _string += '
  2. ' _string += '
' st.markdown(_string.replace('', '\').replace('$', '\$').replace('~', '\~'), unsafe_allow_html=True) # st.text(f'Examples: :blue[{new_dataframe.loc[i, "replaced sentences".replace(":", "[colon]")]}]') # st.dataframe(new_dataframe) st.markdown(hide_expander_border, unsafe_allow_html=True) else: st.error(f'We did not find any Secret Language of {title} on {task}.', icon="⚠️") with tab1: # st.header("NLI") # present(dataframe, 'NLI', title) present_dict(data_title, 'NLI') with tab2: # st.header("QA") # present(dataframe, 'QA', title) present_dict(data_title, 'QA') with tab3: # present(dataframe, 'Paraphrase', title) present_dict(data_title, 'Paraphrase') # st.markdown( # f'¹*Enlish meaning is supported by [PyDictionary](https://pypi.org/project/PyDictionary/).*', # unsafe_allow_html=True) else: st.error(f'{title} is not in the dictionary of Secret Language.', icon="⚠️")