Spaces:

taishi-i
/

awesome-japanese-nlp-resources-search

Running

File size: 2,947 Bytes

import json

import streamlit as st
from pyserini.search.lucene import LuceneSearcher


class SearchApplication:
    def __init__(self):
        self.title = "Awesome Japanese NLP resources search"

        self.set_page_config()
        self.searcher = self.set_searcher()

        st.header(self.title)
        col1, col2 = st.columns(2)
        with col1:
            self.query = st.text_input(
                "Search English or Japanese words", value=""
            )

        with col2:
            st.write("#")
            self.search_button = st.button("🔎")

        st.caption(
            "You can search for open-source software from [400+ Japanese NLP"
            " repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources)."
        )
        st.write("#")

        self.show_popular_words()
        self.show_search_results()

    def set_page_config(self):
        st.set_page_config(
            page_title=self.title,
            page_icon="😎",
            layout="centered",
        )

    def set_searcher(self):
        searcher = LuceneSearcher("indexes/docs")
        searcher.set_language("ja")
        return searcher

    def show_popular_words(self):
        st.caption("Popular words")

        word1, word2, word3, word4, _ = st.columns(5)
        with word1:
            button1 = st.button("Python")
            if button1:
                self.query = "Python"

        with word2:
            button2 = st.button("ChatGPT")
            if button2:
                self.query = "ChatGPT"

        with word3:
            button3 = st.button("辞書")
            if button3:
                self.query = "辞書"

        with word4:
            button4 = st.button("コーパス")
            if button4:
                self.query = "Corpus"

    def show_search_results(self):
        if self.query or self.search_button:
            st.write("#")

            search_results = self.searcher.search(self.query, k=500)
            num_search_results = len(search_results)
            st.write(f"{num_search_results} results")

            for result in sorted(
                search_results,
                key=lambda x: json.loads(x.raw)["stargazers_count"],
                reverse=True
            ):
                data_json = json.loads(result.raw)
                description = data_json["description"]
                url = data_json["url"]
                project_name = data_json["project_name"]
                main_topic = data_json["main_topic"]
                sub_topic = data_json["sub_topic"]

                st.subheader(f"[{project_name}]({url})")
                st.markdown(description)
                if sub_topic is None:
                    st.caption(f"{main_topic}")
                else:
                    st.caption(f"{main_topic} / {sub_topic}")
                st.write("#")


def main():
    SearchApplication()


if __name__ == "__main__":
    main()