Spaces:

anonauthors
/

SecretLanguage

Runtime error

App Files Files Community

SecretLanguage / pages /0_📙_Dictionary_(Search).py

anonymousauthors

Upload 5 files

701f8ff over 1 year ago

raw

history blame

No virus

10.6 kB

	import streamlit as st
	import pandas as pd
	import gdown
	import os
	import pickle
	from collections import defaultdict, Counter

	from PyDictionary import PyDictionary

	dictionary = PyDictionary()

	st.set_page_config(layout="wide", page_title="ACl23 Secret Language")

	hide_expander_border = """
	<style>
	.st-bd {border-style: none;}
	</style>
	"""

	# st.title("ACl23 Secret Language")

	# sidebar
	st.sidebar.header("📙 Dictionary")
	_data = st.experimental_get_query_params()
	default_title = 'Asian'
	if _data:
	default_title = _data['word'][0]

	title = st.sidebar.text_input(":red[Search secret languages given the following word (case-sensitive)]", default_title)

	if ord(title[0]) in list(range(48, 57)):
	file_name = 'num_dict.pkl'
	elif ord(title[0]) in list(range(97, 122)) + list(range(65, 90)):
	file_name = f'{ord(title[0])}_dict.pkl'
	else:
	file_name = 'other_dict.pkl'

	datas = pickle.load(open(f'all_secret_langauge_by_fist/{file_name}', 'rb'))
	if title in datas:
	st.title(title)
	st.markdown(f"## {title}'s meaning in English[¹](#jump)")

	# write the meaning of input word
	try:
	title_mean = dictionary.meaning(title)
	_string = '>'
	for key in title_mean:
	_string += f':violet[{key}]: {";".join(title_mean[key])}<br>'
	st.markdown(_string, unsafe_allow_html=True)
	except:
	st.error(f'We cannot find the meaning of {title} in English (PyDictionary), which might be due to the bug.', icon="🚨")

	st.markdown(f"---")
	st.markdown(f"## {title}'s secret languages")
	data_title = datas[title]
	title_secret_languages = list(sorted(list(set(data_title["secret languages"]))))
	# dataframe = pd.DataFrame(datas[title])
	# st.markdown(f'### We found {len(set(dataframe.loc[:, "secret languages"]))} secret languages of {title}.', unsafe_allow_html=True)
	st.markdown(f'Overall, we found :red[{len(title_secret_languages)}] secret languages of :blue[{title}].', unsafe_allow_html=True)
	special = '"'
	# _title_secret_languages = [f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")})'
	# for i in title_secret_languages]
	# st.markdown('>' + ', '.join(_title_secret_languages).replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
	# unsafe_allow_html=True)

	secret_language_by_task = {
	'QA':[],
	'NLI':[],
	'paraphrase':[],
	}
	for i in range(len(data_title['secret languages'])):
	secret_language_by_task[data_title['tasks'][i]].append(data_title['secret languages'][i])
	for k in secret_language_by_task:
	secret_language_by_task[k] = list(set(secret_language_by_task[k]))

	def present_sl_task(secret_language_by_task, task):
	all_sl = sorted(secret_language_by_task[task])
	st.markdown(f':red[{len(all_sl)}] secret languages of :blue[{title}] on {task.replace("paraphrase", "Paraphrase")}', unsafe_allow_html=True)
	special = '"'
	_title_secret_languages = [f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task})'
	for i in all_sl]
	st.markdown('>' + ', '.join(_title_secret_languages).replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
	unsafe_allow_html=True)
	present_sl_task(secret_language_by_task, 'NLI')
	present_sl_task(secret_language_by_task, 'QA')
	present_sl_task(secret_language_by_task, 'paraphrase')

	st.markdown(f"*Hyperlinks only function when the corresponding tab is open. "
	f"For example, the hyperlinks in the paraphrase section will only work when the paraphrase tab is open.*")
	st.markdown(f"---")
	st.markdown(f"## Examples of replaced sentences")

	# st.text(','.join(title_secret_languages).replace('<s>', '\<s\>'))
	# st.dataframe(dataframe)
	_num = Counter(data_title['tasks'])
	tab1, tab2, tab3 = st.tabs([f'NLI ({_num["NLI"]})', f'QA ({_num["QA"]})', f'Paraphrase ({_num["paraphrase"]})'])


	def present_dataframe(dataframe, key, title):
	new_dataframe = dataframe.loc[dataframe['tasks'] == key].reset_index()
	new_dataframe['replaced sentences'] = new_dataframe['replaced sentences'].str.replace('<s>', '[POS]')
	if len(new_dataframe):
	new_dataframe = new_dataframe.drop(columns=['tasks', 'index'])
	# st.markdown(new_dataframe.columns)
	for i in range(len(new_dataframe)):
	_title = f'{i + 1}\. [{new_dataframe.loc[i, "secret languages"]}]'
	with st.expander(_title):
	# _string = f'{i + 1}. :red[{new_dataframe.loc[i, "secret languages"]}]'
	_string = 'Original '
	if key == 'NLI':
	_string += 'hypothesis: :'
	elif key == 'QA':
	_string += 'question: :'
	elif key == 'Paraphrase':
	_string += 'sentence 1: :'
	_string += f'blue[{new_dataframe.loc[i, "original sentences"]}]'.replace(":", "[colon]")
	_string += '<br>Replaced '
	if key == 'NLI':
	_string += 'hypothesis: :'
	elif key == 'QA':
	_string += 'question: :'
	elif key == 'Paraphrase':
	_string += 'sentence 1: :'
	_string += f'red[{new_dataframe.loc[i, "replaced sentences"]}]'.replace(":", "[colon]")
	if key == 'NLI':
	_string += '<br>premise: :'
	elif key == 'QA':
	_string += '<br>text: :'
	elif key == 'Paraphrase':
	_string += '<br>sentence 2: :'
	_string += f'blue[{new_dataframe.loc[i, "premise / sentence 2 / text"]}]'.replace(":", "[colon]")
	st.markdown(_string, unsafe_allow_html=True)
	# st.text(f'Examples: :blue[{new_dataframe.loc[i, "replaced sentences".replace(":", "[colon]")]}]')
	# st.dataframe(new_dataframe)
	st.markdown(hide_expander_border, unsafe_allow_html=True)
	else:
	st.error(f'We did not find any Secret Language of {title} on {key}.')


	def present_dict(_dict, task):
	# st.text(set(_dict['tasks']))
	_all = defaultdict(int)
	for i in range(len(_dict['secret languages'])):
	if _dict['tasks'][i] == task:
	_sl = _dict['secret languages'][i]
	if type(_all[_sl]) == int:
	_all[_sl] = {
	'Original hypothesis': [],
	'Replaced hypothesis': [],
	'Premise': []
	}
	_all[_sl]['Original hypothesis'].append(_dict['original sentences'][i])
	if task == 'QA':
	_all[_sl]['Replaced hypothesis'].append(_dict['replaced sentences'][i].replace('<s>', ''))
	else:
	_all[_sl]['Replaced hypothesis'].append(_dict['replaced sentences'][i])
	_all[_sl]['Premise'].append(_dict['premise / sentence 2 / text'][i])
	if len(_all.keys()):
	all_keys = sorted(list(_all.keys()))
	for i in range(len(all_keys)):
	_sl = all_keys[i]
	_sl_in_span = _sl.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")
	_title = f'{i + 1}. <span id="{_sl_in_span}_{task}"> :red[{_sl}]</span>'
	# with st.expander(_title, expanded=True):
	_string = _title + '<br>Examples:<br>'
	# st.markdown(_title, unsafe_allow_html=True)
	# st.markdown(f'Examples:', unsafe_allow_html=True)
	_string += '<blockquote><ol>'
	for j in range(len(_all[_sl]['Original hypothesis'])):
	# _string += f'{j+1}. Original '
	_string += f'<li> **Original '
	if task == 'NLI':
	_string += 'hypothesis**: :'
	elif task == 'QA':
	_string += 'question**: :'
	elif task == 'paraphrase':
	_string += 'sentence 1**: :'
	_string += f'blue[{_all[_sl]["Original hypothesis"][j]}]'.replace(":", "[colon]")
	_string += '<br> **Replaced '
	if task == 'NLI':
	_string += 'hypothesis**: :'
	elif task == 'QA':
	_string += 'question**: :'
	elif task == 'paraphrase':
	_string += 'sentence 1**: :'
	_string += f'red[{_all[_sl]["Replaced hypothesis"][j]}]'.replace(":", "[colon]")
	if task == 'NLI':
	_string += '<br> premise: :'
	elif task == 'QA':
	_string += '<br> text: :'
	elif task == 'paraphrase':
	_string += '<br> sentence 2: :'
	_string += f'blue[{_all[_sl]["Premise"][j]}]'.replace(":", "[colon]")
	_string += '<br></li>'
	_string += '</ol></blockquote>'
	st.markdown(_string.replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'), unsafe_allow_html=True)
	# st.text(f'Examples: :blue[{new_dataframe.loc[i, "replaced sentences".replace(":", "[colon]")]}]')
	# st.dataframe(new_dataframe)
	st.markdown(hide_expander_border, unsafe_allow_html=True)
	else:
	st.error(f'We did not find any Secret Language of {title} on {task}.', icon="⚠️")


	with tab1:
	# st.header("NLI")
	# present(dataframe, 'NLI', title)
	present_dict(data_title, 'NLI')
	with tab2:
	# st.header("QA")
	# present(dataframe, 'QA', title)
	present_dict(data_title, 'QA')
	with tab3:
	# present(dataframe, 'Paraphrase', title)
	present_dict(data_title, 'paraphrase')
	st.markdown(
	f'<span id="jump">¹</span>Enlish meaning is supported by [PyDictionary](https://pypi.org/project/PyDictionary/).',
	unsafe_allow_html=True)
	else:
	st.error(f'{title} is not in the dictionary of Secret Language.', icon="⚠️")