taishi-i commited on
Commit
1864742
โ€ข
1 Parent(s): be23b05

update app.py

Browse files
Files changed (2) hide show
  1. README.md +2 -2
  2. app.py +31 -95
README.md CHANGED
@@ -3,8 +3,8 @@ title: Awesome Japanese Nlp Resources Search
3
  emoji: ๐Ÿข
4
  colorFrom: gray
5
  colorTo: indigo
6
- sdk: streamlit
7
- sdk_version: 1.17.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
3
  emoji: ๐Ÿข
4
  colorFrom: gray
5
  colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 4.36.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
app.py CHANGED
@@ -1,110 +1,46 @@
1
  import json
2
 
3
- import streamlit as st
 
4
  from pyserini.search.lucene import LuceneSearcher
5
 
 
 
6
 
7
- class SearchApplication:
8
- def __init__(self):
9
- self.title = "Awesome Japanese NLP resources search"
10
 
11
- self.set_page_config()
12
- self.searcher = self.set_searcher()
13
 
14
- st.header(self.title)
15
- col1, col2 = st.columns(2)
16
- with col1:
17
- self.query = st.text_input(
18
- "Search English or Japanese words", value=""
19
- )
20
 
21
- with col2:
22
- st.write("#")
23
- self.search_button = st.button("๐Ÿ”Ž")
24
 
25
- st.caption(
26
- "You can search for open-source software from [500+ Japanese NLP"
27
- " repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources)."
28
- )
29
- st.write("#")
30
 
31
- self.show_popular_words()
32
- self.show_search_results()
33
 
34
- def set_page_config(self):
35
- st.set_page_config(
36
- page_title=self.title,
37
- page_icon="๐Ÿ˜Ž",
38
- layout="centered",
39
- )
40
 
41
- def set_searcher(self):
42
- searcher = LuceneSearcher("indexes/docs")
43
- searcher.set_language("ja")
44
- return searcher
 
 
 
45
 
46
- def show_popular_words(self):
47
- st.caption("Popular words")
 
 
48
 
49
- word1, word2, word3, word4, _ = st.columns(5)
50
- with word1:
51
- button1 = st.button("Python")
52
- if button1:
53
- self.query = "Python"
54
 
55
- with word2:
56
- button2 = st.button("ChatGPT")
57
- if button2:
58
- self.query = "ChatGPT"
59
-
60
- with word3:
61
- button3 = st.button("่พžๆ›ธ")
62
- if button3:
63
- self.query = "่พžๆ›ธ"
64
-
65
- with word4:
66
- button4 = st.button("ใ‚ณใƒผใƒ‘ใ‚น")
67
- if button4:
68
- self.query = "Corpus"
69
-
70
- def show_search_results(self):
71
- if self.query or self.search_button:
72
- st.write("#")
73
-
74
- search_results = self.searcher.search(self.query, k=500)
75
- num_search_results = len(search_results)
76
- st.write(f"{num_search_results} results")
77
-
78
- for result in sorted(
79
- search_results,
80
- key=lambda x: json.loads(self.searcher.doc(x.docid).raw())[
81
- "stargazers_count"
82
- ],
83
- reverse=True,
84
- ):
85
- docid = result.docid
86
- doc = self.searcher.doc(docid)
87
- data_json = json.loads(doc.raw())
88
-
89
- description = data_json["description"]
90
- url = data_json["url"]
91
- project_name = data_json["project_name"]
92
- main_topic = data_json["main_topic"]
93
- sub_topic = data_json["sub_topic"]
94
-
95
- st.write("---")
96
- st.subheader(f"[{project_name}]({url})")
97
- st.markdown(description)
98
- if sub_topic is None:
99
- st.caption(f"{main_topic}")
100
- else:
101
- st.caption(f"{main_topic} / {sub_topic}")
102
- st.write("#")
103
-
104
-
105
- def main():
106
- SearchApplication()
107
-
108
-
109
- if __name__ == "__main__":
110
- main()
 
1
  import json
2
 
3
+ import gradio as gr
4
+ import pandas as pd
5
  from pyserini.search.lucene import LuceneSearcher
6
 
7
+ searcher = LuceneSearcher("indexes/docs")
8
+ searcher.set_language("ja")
9
 
 
 
 
10
 
11
+ def show_search_results(query):
12
+ search_results = searcher.search(query, k=100)
13
 
14
+ data = {"project_name": [], "description": []}
15
+ for result in search_results:
16
+ docid = result.docid
17
+ doc = searcher.doc(docid)
18
+ data_json = json.loads(doc.raw())
 
19
 
20
+ url = data_json["url"]
21
+ description = data_json["description"]
22
+ project_name = data_json["project_name"]
23
 
24
+ data["project_name"].append(f"[{project_name}]({url})")
25
+ data["description"].append(description)
 
 
 
26
 
27
+ data = pd.DataFrame(data)
28
+ return data
29
 
 
 
 
 
 
 
30
 
31
+ with gr.Blocks() as demo:
32
+ gr.Markdown(
33
+ """
34
+ # Awesome Japanese NLP resources search ๐Ÿ”Ž
35
+ You can search for open-source software from [500+ Japanese NLP repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources).
36
+ """
37
+ )
38
 
39
+ query = gr.Textbox(
40
+ label="Search English or Japanese words", placeholder="llm"
41
+ )
42
+ df = gr.DataFrame(type="pandas", datatype="markdown", height=1000)
43
 
44
+ query.change(fn=show_search_results, inputs=query, outputs=df)
 
 
 
 
45
 
46
+ demo.launch()