|
import json |
|
|
|
import streamlit as st |
|
from pyserini.search.lucene import LuceneSearcher |
|
|
|
|
|
class SearchApplication: |
|
def __init__(self): |
|
self.title = "Awesome Japanese NLP resources search" |
|
|
|
self.set_page_config() |
|
self.searcher = self.set_searcher() |
|
|
|
st.header(self.title) |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
self.query = st.text_input( |
|
"Search English or Japanese words", value="" |
|
) |
|
|
|
with col2: |
|
st.write("#") |
|
self.search_button = st.button("🔎") |
|
|
|
st.caption( |
|
"You can search for open-source software from [500+ Japanese NLP" |
|
" repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources)." |
|
) |
|
st.write("#") |
|
|
|
self.show_popular_words() |
|
self.show_search_results() |
|
|
|
def set_page_config(self): |
|
st.set_page_config( |
|
page_title=self.title, |
|
page_icon="😎", |
|
layout="centered", |
|
) |
|
|
|
def set_searcher(self): |
|
searcher = LuceneSearcher("indexes/docs") |
|
searcher.set_language("ja") |
|
return searcher |
|
|
|
def show_popular_words(self): |
|
st.caption("Popular words") |
|
|
|
word1, word2, word3, word4, _ = st.columns(5) |
|
with word1: |
|
button1 = st.button("Python") |
|
if button1: |
|
self.query = "Python" |
|
|
|
with word2: |
|
button2 = st.button("ChatGPT") |
|
if button2: |
|
self.query = "ChatGPT" |
|
|
|
with word3: |
|
button3 = st.button("辞書") |
|
if button3: |
|
self.query = "辞書" |
|
|
|
with word4: |
|
button4 = st.button("コーパス") |
|
if button4: |
|
self.query = "Corpus" |
|
|
|
def show_search_results(self): |
|
if self.query or self.search_button: |
|
st.write("#") |
|
|
|
search_results = self.searcher.search(self.query, k=500) |
|
num_search_results = len(search_results) |
|
st.write(f"{num_search_results} results") |
|
|
|
for result in sorted( |
|
search_results, |
|
key=lambda x: json.loads(x.raw)["stargazers_count"], |
|
reverse=True |
|
): |
|
data_json = json.loads(result.raw) |
|
description = data_json["description"] |
|
url = data_json["url"] |
|
project_name = data_json["project_name"] |
|
main_topic = data_json["main_topic"] |
|
sub_topic = data_json["sub_topic"] |
|
|
|
st.write("---") |
|
st.subheader(f"[{project_name}]({url})") |
|
st.markdown(description) |
|
if sub_topic is None: |
|
st.caption(f"{main_topic}") |
|
else: |
|
st.caption(f"{main_topic} / {sub_topic}") |
|
st.write("#") |
|
|
|
|
|
def main(): |
|
SearchApplication() |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|