taishi-i's picture
add first and latest commit dates to table
6d0a7aa
raw
history blame
6.44 kB
import json
from datetime import datetime
import gradio as gr
import pandas as pd
def read_json(file_name):
with open(file_name, "r") as f:
json_data = json.load(f)
return json_data
def truncate_text(text, max_length=40):
if len(text) > max_length:
return text[: max_length - 1] + "…"
else:
return text
json_file = "awesome-japanese-nlp-resources-search.json"
json_data = read_json(json_file)
data = {
"project_name": [],
"downloads": [],
"stars": [],
"description": [],
"first_commit": [],
"latest_commit": [],
"source": [],
"languages": [],
"type": [],
}
for data_json in json_data:
url = data_json["url"]
description = data_json["description"].lower()
project_name = data_json["project_name"]
source = data_json["source"]
languages = data_json["languages"]
repo_type = data_json["model_or_dataset"]
first_commit = data_json["first_commit"]
if first_commit:
first_commit = datetime.strptime(first_commit, "%Y-%m-%d %H:%M:%S")
first_commit = first_commit.date()
latest_commit = data_json["latest_commit"]
if latest_commit:
latest_commit = datetime.strptime(latest_commit, "%Y-%m-%d %H:%M:%S")
latest_commit = latest_commit.date()
if "stargazers_count" in data_json:
data["stars"].append(data_json["stargazers_count"])
else:
data["stars"].append(None)
if "downloads" in data_json:
data["downloads"].append(data_json["downloads"])
else:
data["downloads"].append(None)
data["project_name"].append(f"[{truncate_text(project_name)}]({url})")
data["source"].append(source)
data["description"].append(description)
data["languages"].append(languages)
data["type"].append(repo_type)
data["first_commit"].append(first_commit)
data["latest_commit"].append(latest_commit)
data = pd.DataFrame(data)
def show_search_results(
language_filter, queries, source_checkbox, show_checkbox
):
queries = queries.lower()
queries = queries.split()
df_search = data
if language_filter:
def contains_language(language_list, filter_lang):
return filter_lang in language_list
matches = df_search["languages"].apply(
contains_language, filter_lang=language_filter
)
df_search = df_search[matches]
# source_checkbox
if "GitHub" not in source_checkbox:
df_search = df_search[df_search["source"] != "GitHub"]
df_search = df_search.drop("stars", axis=1)
if "Hugging Face" not in source_checkbox:
df_search = df_search[df_search["source"] != "Hugging Face"]
df_search = df_search.drop("downloads", axis=1)
if "Dataset" in source_checkbox:
df_search = df_search[df_search["type"] == "dataset"]
if "Model" in source_checkbox:
df_search = df_search[df_search["type"] == "model"]
# show_checkbox
if "project_name" not in show_checkbox:
df_search = df_search.drop("project_name", axis=1)
if "downloads" not in show_checkbox:
df_search = df_search.drop("downloads", axis=1)
if "stars" not in show_checkbox:
df_search = df_search.drop("stars", axis=1)
if "first_commit" not in show_checkbox:
df_search = df_search.drop("first_commit", axis=1)
if "latest_commit" not in show_checkbox:
df_search = df_search.drop("latest_commit", axis=1)
if "description" not in show_checkbox:
df_search = df_search.drop("description", axis=1)
if "source" not in show_checkbox:
df_search = df_search.drop("source", axis=1)
if "languages" not in show_checkbox:
df_search = df_search.drop("languages", axis=1)
if "type" not in show_checkbox:
df_search = df_search.drop("type", axis=1)
for query in queries:
contained_description = data["description"].str.contains(query)
contained_project_name = data["project_name"].str.contains(query)
df_search = df_search[contained_description | contained_project_name]
return df_search
with gr.Blocks() as demo:
gr.Markdown(
"""
# Awesome Japanese NLP resources search πŸ”Ž
You can search for open-source software from [1250+ Japanese NLP repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources).
"""
)
query = gr.Textbox(label="Search words", placeholder="llm")
languages = [
"Python",
"Jupyter Notebook",
"Java",
"C++",
"JavaScript",
"TypeScript",
"C#",
"Rust",
"Go",
"C",
"Kotlin",
"Ruby",
"Perl",
]
language_selector = gr.Dropdown(
label="Programming Language",
choices=languages,
)
source_checkbox = gr.CheckboxGroup(
["GitHub", "Hugging Face", "Dataset", "Model"],
value=["GitHub", "Hugging Face"],
label="Source",
)
show_checkbox = gr.CheckboxGroup(
[
"project_name",
"downloads",
"stars",
"description",
"first_commit",
"latest_commit",
"source",
"type",
"languages",
],
value=[
"project_name",
"downloads",
"stars",
"description",
],
label="Display columns in a table",
)
df = gr.DataFrame(
value=data,
type="pandas",
datatype="markdown",
height=600,
)
query.change(
fn=show_search_results,
inputs=[
language_selector,
query,
source_checkbox,
show_checkbox,
],
outputs=df,
)
language_selector.change(
fn=show_search_results,
inputs=[
language_selector,
query,
source_checkbox,
show_checkbox,
],
outputs=df,
)
source_checkbox.change(
fn=show_search_results,
inputs=[
language_selector,
query,
source_checkbox,
show_checkbox,
],
outputs=df,
)
show_checkbox.change(
fn=show_search_results,
inputs=[
language_selector,
query,
source_checkbox,
show_checkbox,
],
outputs=df,
)
demo.launch()