taishi-i's picture
update indexes
f11c7c6
raw history blame
No virus
2.98 kB
import json
import streamlit as st
from pyserini.search.lucene import LuceneSearcher
class SearchApplication:
def __init__(self):
self.title = "Awesome Japanese NLP resources search"
self.set_page_config()
self.searcher = self.set_searcher()
st.header(self.title)
col1, col2 = st.columns(2)
with col1:
self.query = st.text_input(
"Search English or Japanese words", value=""
)
with col2:
st.write("#")
self.search_button = st.button("🔎")
st.caption(
"You can search for open-source software from [400+ Japanese NLP"
" repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources)."
)
st.write("#")
self.show_popular_words()
self.show_search_results()
def set_page_config(self):
st.set_page_config(
page_title=self.title,
page_icon="😎",
layout="centered",
)
def set_searcher(self):
searcher = LuceneSearcher("indexes/docs")
searcher.set_language("ja")
return searcher
def show_popular_words(self):
st.caption("Popular words")
word1, word2, word3, word4, _ = st.columns(5)
with word1:
button1 = st.button("Python")
if button1:
self.query = "Python"
with word2:
button2 = st.button("ChatGPT")
if button2:
self.query = "ChatGPT"
with word3:
button3 = st.button("辞書")
if button3:
self.query = "辞書"
with word4:
button4 = st.button("コーパス")
if button4:
self.query = "Corpus"
def show_search_results(self):
if self.query or self.search_button:
st.write("#")
search_results = self.searcher.search(self.query, k=500)
num_search_results = len(search_results)
st.write(f"{num_search_results} results")
for result in sorted(
search_results,
key=lambda x: json.loads(x.raw)["stargazers_count"],
reverse=True
):
data_json = json.loads(result.raw)
description = data_json["description"]
url = data_json["url"]
project_name = data_json["project_name"]
main_topic = data_json["main_topic"]
sub_topic = data_json["sub_topic"]
st.write("---")
st.subheader(f"[{project_name}]({url})")
st.markdown(description)
if sub_topic is None:
st.caption(f"{main_topic}")
else:
st.caption(f"{main_topic} / {sub_topic}")
st.write("#")
def main():
SearchApplication()
if __name__ == "__main__":
main()