File size: 2,947 Bytes
c5b16f9
 
 
 
 
94349ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338536e
94349ba
338536e
94349ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05a0271
 
 
 
 
94349ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import json

import streamlit as st
from pyserini.search.lucene import LuceneSearcher


class SearchApplication:
    def __init__(self):
        self.title = "Awesome Japanese NLP resources search"

        self.set_page_config()
        self.searcher = self.set_searcher()

        st.header(self.title)
        col1, col2 = st.columns(2)
        with col1:
            self.query = st.text_input(
                "Search English or Japanese words", value=""
            )

        with col2:
            st.write("#")
            self.search_button = st.button("🔎")

        st.caption(
            "You can search for open-source software from [400+ Japanese NLP"
            " repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources)."
        )
        st.write("#")

        self.show_popular_words()
        self.show_search_results()

    def set_page_config(self):
        st.set_page_config(
            page_title=self.title,
            page_icon="😎",
            layout="centered",
        )

    def set_searcher(self):
        searcher = LuceneSearcher("indexes/docs")
        searcher.set_language("ja")
        return searcher

    def show_popular_words(self):
        st.caption("Popular words")

        word1, word2, word3, word4, _ = st.columns(5)
        with word1:
            button1 = st.button("Python")
            if button1:
                self.query = "Python"

        with word2:
            button2 = st.button("ChatGPT")
            if button2:
                self.query = "ChatGPT"

        with word3:
            button3 = st.button("辞書")
            if button3:
                self.query = "辞書"

        with word4:
            button4 = st.button("コーパス")
            if button4:
                self.query = "Corpus"

    def show_search_results(self):
        if self.query or self.search_button:
            st.write("#")

            search_results = self.searcher.search(self.query, k=500)
            num_search_results = len(search_results)
            st.write(f"{num_search_results} results")

            for result in sorted(
                search_results,
                key=lambda x: json.loads(x.raw)["stargazers_count"],
                reverse=True
            ):
                data_json = json.loads(result.raw)
                description = data_json["description"]
                url = data_json["url"]
                project_name = data_json["project_name"]
                main_topic = data_json["main_topic"]
                sub_topic = data_json["sub_topic"]

                st.subheader(f"[{project_name}]({url})")
                st.markdown(description)
                if sub_topic is None:
                    st.caption(f"{main_topic}")
                else:
                    st.caption(f"{main_topic} / {sub_topic}")
                st.write("#")


def main():
    SearchApplication()


if __name__ == "__main__":
    main()