add first and latest commit dates to table
Browse files- app.py +86 -17
- awesome-japanese-nlp-resources-search.json +0 -0
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import json
|
|
|
2 |
|
3 |
import gradio as gr
|
4 |
import pandas as pd
|
@@ -10,6 +11,13 @@ def read_json(file_name):
|
|
10 |
return json_data
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
json_file = "awesome-japanese-nlp-resources-search.json"
|
14 |
json_data = read_json(json_file)
|
15 |
data = {
|
@@ -17,6 +25,8 @@ data = {
|
|
17 |
"downloads": [],
|
18 |
"stars": [],
|
19 |
"description": [],
|
|
|
|
|
20 |
"source": [],
|
21 |
"languages": [],
|
22 |
"type": [],
|
@@ -29,6 +39,15 @@ for data_json in json_data:
|
|
29 |
source = data_json["source"]
|
30 |
languages = data_json["languages"]
|
31 |
repo_type = data_json["model_or_dataset"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
if "stargazers_count" in data_json:
|
34 |
data["stars"].append(data_json["stargazers_count"])
|
@@ -40,17 +59,19 @@ for data_json in json_data:
|
|
40 |
else:
|
41 |
data["downloads"].append(None)
|
42 |
|
43 |
-
data["project_name"].append(f"[{project_name}]({url})")
|
44 |
data["source"].append(source)
|
45 |
data["description"].append(description)
|
46 |
data["languages"].append(languages)
|
47 |
data["type"].append(repo_type)
|
|
|
|
|
48 |
|
49 |
data = pd.DataFrame(data)
|
50 |
|
51 |
|
52 |
def show_search_results(
|
53 |
-
language_filter, queries,
|
54 |
):
|
55 |
queries = queries.lower()
|
56 |
queries = queries.split()
|
@@ -67,12 +88,7 @@ def show_search_results(
|
|
67 |
)
|
68 |
df_search = df_search[matches]
|
69 |
|
70 |
-
|
71 |
-
df_search = df_search[df_search["type"] == "dataset"]
|
72 |
-
|
73 |
-
if "Model" in type_checkbox:
|
74 |
-
df_search = df_search[df_search["type"] == "model"]
|
75 |
-
|
76 |
if "GitHub" not in source_checkbox:
|
77 |
df_search = df_search[df_search["source"] != "GitHub"]
|
78 |
df_search = df_search.drop("stars", axis=1)
|
@@ -81,6 +97,40 @@ def show_search_results(
|
|
81 |
df_search = df_search[df_search["source"] != "Hugging Face"]
|
82 |
df_search = df_search.drop("downloads", axis=1)
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
for query in queries:
|
85 |
contained_description = data["description"].str.contains(query)
|
86 |
|
@@ -120,14 +170,30 @@ with gr.Blocks() as demo:
|
|
120 |
)
|
121 |
|
122 |
source_checkbox = gr.CheckboxGroup(
|
123 |
-
["GitHub", "Hugging Face"],
|
124 |
value=["GitHub", "Hugging Face"],
|
125 |
label="Source",
|
126 |
)
|
127 |
|
128 |
-
|
129 |
-
[
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
)
|
132 |
|
133 |
df = gr.DataFrame(
|
@@ -142,38 +208,41 @@ with gr.Blocks() as demo:
|
|
142 |
inputs=[
|
143 |
language_selector,
|
144 |
query,
|
145 |
-
type_checkbox,
|
146 |
source_checkbox,
|
|
|
147 |
],
|
148 |
outputs=df,
|
149 |
)
|
|
|
150 |
language_selector.change(
|
151 |
fn=show_search_results,
|
152 |
inputs=[
|
153 |
language_selector,
|
154 |
query,
|
155 |
-
type_checkbox,
|
156 |
source_checkbox,
|
|
|
157 |
],
|
158 |
outputs=df,
|
159 |
)
|
|
|
160 |
source_checkbox.change(
|
161 |
fn=show_search_results,
|
162 |
inputs=[
|
163 |
language_selector,
|
164 |
query,
|
165 |
-
type_checkbox,
|
166 |
source_checkbox,
|
|
|
167 |
],
|
168 |
outputs=df,
|
169 |
)
|
170 |
-
|
|
|
171 |
fn=show_search_results,
|
172 |
inputs=[
|
173 |
language_selector,
|
174 |
query,
|
175 |
-
type_checkbox,
|
176 |
source_checkbox,
|
|
|
177 |
],
|
178 |
outputs=df,
|
179 |
)
|
|
|
1 |
import json
|
2 |
+
from datetime import datetime
|
3 |
|
4 |
import gradio as gr
|
5 |
import pandas as pd
|
|
|
11 |
return json_data
|
12 |
|
13 |
|
14 |
+
def truncate_text(text, max_length=40):
|
15 |
+
if len(text) > max_length:
|
16 |
+
return text[: max_length - 1] + "…"
|
17 |
+
else:
|
18 |
+
return text
|
19 |
+
|
20 |
+
|
21 |
json_file = "awesome-japanese-nlp-resources-search.json"
|
22 |
json_data = read_json(json_file)
|
23 |
data = {
|
|
|
25 |
"downloads": [],
|
26 |
"stars": [],
|
27 |
"description": [],
|
28 |
+
"first_commit": [],
|
29 |
+
"latest_commit": [],
|
30 |
"source": [],
|
31 |
"languages": [],
|
32 |
"type": [],
|
|
|
39 |
source = data_json["source"]
|
40 |
languages = data_json["languages"]
|
41 |
repo_type = data_json["model_or_dataset"]
|
42 |
+
first_commit = data_json["first_commit"]
|
43 |
+
if first_commit:
|
44 |
+
first_commit = datetime.strptime(first_commit, "%Y-%m-%d %H:%M:%S")
|
45 |
+
first_commit = first_commit.date()
|
46 |
+
|
47 |
+
latest_commit = data_json["latest_commit"]
|
48 |
+
if latest_commit:
|
49 |
+
latest_commit = datetime.strptime(latest_commit, "%Y-%m-%d %H:%M:%S")
|
50 |
+
latest_commit = latest_commit.date()
|
51 |
|
52 |
if "stargazers_count" in data_json:
|
53 |
data["stars"].append(data_json["stargazers_count"])
|
|
|
59 |
else:
|
60 |
data["downloads"].append(None)
|
61 |
|
62 |
+
data["project_name"].append(f"[{truncate_text(project_name)}]({url})")
|
63 |
data["source"].append(source)
|
64 |
data["description"].append(description)
|
65 |
data["languages"].append(languages)
|
66 |
data["type"].append(repo_type)
|
67 |
+
data["first_commit"].append(first_commit)
|
68 |
+
data["latest_commit"].append(latest_commit)
|
69 |
|
70 |
data = pd.DataFrame(data)
|
71 |
|
72 |
|
73 |
def show_search_results(
|
74 |
+
language_filter, queries, source_checkbox, show_checkbox
|
75 |
):
|
76 |
queries = queries.lower()
|
77 |
queries = queries.split()
|
|
|
88 |
)
|
89 |
df_search = df_search[matches]
|
90 |
|
91 |
+
# source_checkbox
|
|
|
|
|
|
|
|
|
|
|
92 |
if "GitHub" not in source_checkbox:
|
93 |
df_search = df_search[df_search["source"] != "GitHub"]
|
94 |
df_search = df_search.drop("stars", axis=1)
|
|
|
97 |
df_search = df_search[df_search["source"] != "Hugging Face"]
|
98 |
df_search = df_search.drop("downloads", axis=1)
|
99 |
|
100 |
+
if "Dataset" in source_checkbox:
|
101 |
+
df_search = df_search[df_search["type"] == "dataset"]
|
102 |
+
|
103 |
+
if "Model" in source_checkbox:
|
104 |
+
df_search = df_search[df_search["type"] == "model"]
|
105 |
+
|
106 |
+
# show_checkbox
|
107 |
+
if "project_name" not in show_checkbox:
|
108 |
+
df_search = df_search.drop("project_name", axis=1)
|
109 |
+
|
110 |
+
if "downloads" not in show_checkbox:
|
111 |
+
df_search = df_search.drop("downloads", axis=1)
|
112 |
+
|
113 |
+
if "stars" not in show_checkbox:
|
114 |
+
df_search = df_search.drop("stars", axis=1)
|
115 |
+
|
116 |
+
if "first_commit" not in show_checkbox:
|
117 |
+
df_search = df_search.drop("first_commit", axis=1)
|
118 |
+
|
119 |
+
if "latest_commit" not in show_checkbox:
|
120 |
+
df_search = df_search.drop("latest_commit", axis=1)
|
121 |
+
|
122 |
+
if "description" not in show_checkbox:
|
123 |
+
df_search = df_search.drop("description", axis=1)
|
124 |
+
|
125 |
+
if "source" not in show_checkbox:
|
126 |
+
df_search = df_search.drop("source", axis=1)
|
127 |
+
|
128 |
+
if "languages" not in show_checkbox:
|
129 |
+
df_search = df_search.drop("languages", axis=1)
|
130 |
+
|
131 |
+
if "type" not in show_checkbox:
|
132 |
+
df_search = df_search.drop("type", axis=1)
|
133 |
+
|
134 |
for query in queries:
|
135 |
contained_description = data["description"].str.contains(query)
|
136 |
|
|
|
170 |
)
|
171 |
|
172 |
source_checkbox = gr.CheckboxGroup(
|
173 |
+
["GitHub", "Hugging Face", "Dataset", "Model"],
|
174 |
value=["GitHub", "Hugging Face"],
|
175 |
label="Source",
|
176 |
)
|
177 |
|
178 |
+
show_checkbox = gr.CheckboxGroup(
|
179 |
+
[
|
180 |
+
"project_name",
|
181 |
+
"downloads",
|
182 |
+
"stars",
|
183 |
+
"description",
|
184 |
+
"first_commit",
|
185 |
+
"latest_commit",
|
186 |
+
"source",
|
187 |
+
"type",
|
188 |
+
"languages",
|
189 |
+
],
|
190 |
+
value=[
|
191 |
+
"project_name",
|
192 |
+
"downloads",
|
193 |
+
"stars",
|
194 |
+
"description",
|
195 |
+
],
|
196 |
+
label="Display columns in a table",
|
197 |
)
|
198 |
|
199 |
df = gr.DataFrame(
|
|
|
208 |
inputs=[
|
209 |
language_selector,
|
210 |
query,
|
|
|
211 |
source_checkbox,
|
212 |
+
show_checkbox,
|
213 |
],
|
214 |
outputs=df,
|
215 |
)
|
216 |
+
|
217 |
language_selector.change(
|
218 |
fn=show_search_results,
|
219 |
inputs=[
|
220 |
language_selector,
|
221 |
query,
|
|
|
222 |
source_checkbox,
|
223 |
+
show_checkbox,
|
224 |
],
|
225 |
outputs=df,
|
226 |
)
|
227 |
+
|
228 |
source_checkbox.change(
|
229 |
fn=show_search_results,
|
230 |
inputs=[
|
231 |
language_selector,
|
232 |
query,
|
|
|
233 |
source_checkbox,
|
234 |
+
show_checkbox,
|
235 |
],
|
236 |
outputs=df,
|
237 |
)
|
238 |
+
|
239 |
+
show_checkbox.change(
|
240 |
fn=show_search_results,
|
241 |
inputs=[
|
242 |
language_selector,
|
243 |
query,
|
|
|
244 |
source_checkbox,
|
245 |
+
show_checkbox,
|
246 |
],
|
247 |
outputs=df,
|
248 |
)
|
awesome-japanese-nlp-resources-search.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|