import json import streamlit as st from pyserini.search.lucene import LuceneSearcher class SearchApplication: def __init__(self): self.title = "Awesome Japanese NLP resources search" self.set_page_config() self.searcher = self.set_searcher() st.header(self.title) col1, col2 = st.columns(2) with col1: self.query = st.text_input( "Search English or Japanese words", value="" ) with col2: st.write("#") self.search_button = st.button("🔎") st.caption( "You can search for open-source software from [400+ Japanese NLP" " repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources)." ) st.write("#") self.show_popular_words() self.show_search_results() def set_page_config(self): st.set_page_config( page_title=self.title, page_icon="😎", layout="centered", ) def set_searcher(self): searcher = LuceneSearcher("indexes/docs") searcher.set_language("ja") return searcher def show_popular_words(self): st.caption("Popular words") word1, word2, word3, word4, _ = st.columns(5) with word1: button1 = st.button("Python") if button1: self.query = "Python" with word2: button2 = st.button("ChatGPT") if button2: self.query = "ChatGPT" with word3: button3 = st.button("辞書") if button3: self.query = "辞書" with word4: button4 = st.button("コーパス") if button4: self.query = "Corpus" def show_search_results(self): if self.query or self.search_button: st.write("#") search_results = self.searcher.search(self.query, k=500) num_search_results = len(search_results) st.write(f"{num_search_results} results") for result in sorted( search_results, key=lambda x: json.loads(x.raw)["stargazers_count"], reverse=True ): data_json = json.loads(result.raw) description = data_json["description"] url = data_json["url"] project_name = data_json["project_name"] main_topic = data_json["main_topic"] sub_topic = data_json["sub_topic"] st.write("---") st.subheader(f"[{project_name}]({url})") st.markdown(description) if sub_topic is None: st.caption(f"{main_topic}") else: st.caption(f"{main_topic} / {sub_topic}") st.write("#") def main(): SearchApplication() if __name__ == "__main__": main()