Spaces:

raynardj
/

x-language-search-ancient-with-modern-words

Sleeping

App Files Files Community

x-language-search-ancient-with-modern-words / app.py

raynardj

load from encoded

cfa2da9 over 2 years ago

raw

history blame contribute delete

No virus

2.08 kB

	import streamlit as st
	import pandas as pd
	from sentence_transformers import SentenceTransformer
	from forgebox.cosine import CosineSearch
	import numpy as np

	from PIL import Image
	image = Image.open('shiji.png')


	st.markdown("""
	## 🍻 跨古/现代文搜索：用白话搜史记
	""")

	st.sidebar.image(image, use_column_width=True)
	st.sidebar.markdown("""
	Search《Records of the Grand Historian》 with modern Chinese
	### References
	* Model trained [here, please hit ⭐️](https://github.com/raynardj/yuan)
	* [Trained crossed language BERT](https://huggingface.co/raynardj/xlsearch-cross-lang-search-zh-vs-classicical-cn)
	### Related projects
	* Read more [ancient books(almost all) with a translator](https://huggingface.co/spaces/raynardj/duguwen-classical-chinese-to-morden-translate)
	* [Modern Chines to classical Chinese translator](https://huggingface.co/spaces/raynardj/modern-chinese-to-ancient-translate-wenyanwen)
	""")

	TAG = "raynardj/xlsearch-cross-lang-search-zh-vs-classicical-cn"

	@st.cache(allow_output_mutation=True)
	def load_encoder():
	with st.spinner(f"Loading Transformer:{TAG}"):
	encoder = SentenceTransformer(TAG)
	return encoder

	encoder = load_encoder()

	@st.cache(allow_output_mutation=True)
	def load_book():
	with st.spinner(f"📚 Loading Book..."):
	df = pd.read_csv("grand_historian.csv")
	return list(df.sentences)

	all_lines = load_book()

	@st.cache(allow_output_mutation=True)
	def encode_book():
	with st.spinner(f"Encoding sentences for book《Records of the Grand Historian》"):
	vec = np.load('vec.npy')
	cosine = CosineSearch(vec)
	return cosine

	cosine = encode_book()

	def search(text):
	enc = encoder.encode(text) # encode the search key
	order = cosine(enc) # distance array
	sentence_df = pd.DataFrame({"sentence":np.array(all_lines)[order[:5]]})
	return sentence_df

	keyword = st.text_input("用白话搜", "")
	if st.button("搜索"):
	if keyword:
	with st.spinner(f"🔍 Searching for {keyword}"):
	df = search(keyword)
	st.table(df)