taishi-i's picture
add source search function
ed8d73f
raw
history blame contribute delete
No virus
3.2 kB
import json
import gradio as gr
import pandas as pd
def read_json(file_name):
with open(file_name, "r") as f:
json_data = json.load(f)
return json_data
json_file = "awesome-japanese-nlp-resources-search.json"
json_data = read_json(json_file)
data = {"project_name": [], "source": [], "description": [], "languages": []}
for data_json in json_data:
url = data_json["url"]
description = data_json["description"]
project_name = data_json["project_name"]
source = data_json["source"]
languages = data_json["languages"]
data["project_name"].append(f"[{project_name}]({url})")
data["source"].append(source)
data["description"].append(description)
data["languages"].append(", ".join(languages))
data = pd.DataFrame(data)
def show_search_results(language_filter, queries, source_checkbox):
queries = queries.lower()
queries = queries.split()
df_search = data
if language_filter:
regex_pattern = r"\b" + language_filter + r"\b"
df_search = df_search[
df_search["languages"].str.contains(
regex_pattern, case=False, regex=True, na=False
)
]
if "GitHub" not in source_checkbox:
df_search = df_search[df_search["source"] != "GitHub"]
if "Hugging Face" not in source_checkbox:
df_search = df_search[df_search["source"] != "Hugging Face"]
for query in queries:
contained_description = data["description"].str.contains(query)
contained_project_name = data["project_name"].str.contains(query)
df_search = df_search[contained_description | contained_project_name]
return df_search
with gr.Blocks() as demo:
gr.Markdown(
"""
# Awesome Japanese NLP resources search πŸ”Ž
You can search for open-source software from [1250+ Japanese NLP repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources).
"""
)
query = gr.Textbox(
label="Search English or Japanese words", placeholder="llm"
)
languages = [
"Python",
"Jupyter Notebook",
"Java",
"C++",
"JavaScript",
"TypeScript",
"C#",
"Rust",
"Go",
"C",
"Kotlin",
"Ruby",
"Perl",
"Lua",
"PHP",
"Julia",
"R",
"Swift",
"Haskell",
"Scala",
]
language_selector = gr.Dropdown(
label="Programming Language",
choices=languages,
)
source_checkbox = gr.CheckboxGroup(
["GitHub", "Hugging Face"],
value=["GitHub", "Hugging Face"],
label="Source",
)
df = gr.DataFrame(
value=data, type="pandas", datatype="markdown", height=1000
)
query.change(
fn=show_search_results,
inputs=[language_selector, query, source_checkbox],
outputs=df,
)
language_selector.change(
fn=show_search_results,
inputs=[language_selector, query, source_checkbox],
outputs=df,
)
source_checkbox.change(
fn=show_search_results,
inputs=[language_selector, query, source_checkbox],
outputs=df,
)
demo.launch()