Spaces:

taishi-i
/

awesome-japanese-nlp-resources-search

Running

App Files Files Community

awesome-japanese-nlp-resources-search / app.py

taishi-i

update index files

338536e over 1 year ago

raw

history blame

2.95 kB

	import json

	import streamlit as st
	from pyserini.search.lucene import LuceneSearcher


	class SearchApplication:
	def __init__(self):
	self.title = "Awesome Japanese NLP resources search"

	self.set_page_config()
	self.searcher = self.set_searcher()

	st.header(self.title)
	col1, col2 = st.columns(2)
	with col1:
	self.query = st.text_input(
	"Search English or Japanese words", value=""
	)

	with col2:
	st.write("#")
	self.search_button = st.button("🔎")

	st.caption(
	"You can search for open-source software from [400+ Japanese NLP"
	" repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources)."
	)
	st.write("#")

	self.show_popular_words()
	self.show_search_results()

	def set_page_config(self):
	st.set_page_config(
	page_title=self.title,
	page_icon="😎",
	layout="centered",
	)

	def set_searcher(self):
	searcher = LuceneSearcher("indexes/docs")
	searcher.set_language("ja")
	return searcher

	def show_popular_words(self):
	st.caption("Popular words")

	word1, word2, word3, word4, _ = st.columns(5)
	with word1:
	button1 = st.button("Python")
	if button1:
	self.query = "Python"

	with word2:
	button2 = st.button("ChatGPT")
	if button2:
	self.query = "ChatGPT"

	with word3:
	button3 = st.button("辞書")
	if button3:
	self.query = "辞書"

	with word4:
	button4 = st.button("コーパス")
	if button4:
	self.query = "Corpus"

	def show_search_results(self):
	if self.query or self.search_button:
	st.write("#")

	search_results = self.searcher.search(self.query, k=500)
	num_search_results = len(search_results)
	st.write(f"{num_search_results} results")

	for result in sorted(
	search_results,
	key=lambda x: json.loads(x.raw)["stargazers_count"],
	reverse=True
	):
	data_json = json.loads(result.raw)
	description = data_json["description"]
	url = data_json["url"]
	project_name = data_json["project_name"]
	main_topic = data_json["main_topic"]
	sub_topic = data_json["sub_topic"]

	st.subheader(f"[{project_name}]({url})")
	st.markdown(description)
	if sub_topic is None:
	st.caption(f"{main_topic}")
	else:
	st.caption(f"{main_topic} / {sub_topic}")
	st.write("#")


	def main():
	SearchApplication()


	if __name__ == "__main__":
	main()