File size: 3,198 Bytes
c5b16f9 1864742 c5b16f9 94349ba 650b6f6 94349ba 650b6f6 2fcc9e9 650b6f6 2fcc9e9 650b6f6 2fcc9e9 94349ba 650b6f6 94349ba ed8d73f 2759288 2fcc9e9 ed8d73f 2759288 2fcc9e9 2759288 650b6f6 94349ba 1864742 7ce3b6b 1864742 94349ba 1864742 2fcc9e9 ed8d73f 41ccd9e 94349ba 2fcc9e9 ed8d73f 2fcc9e9 ed8d73f 2fcc9e9 94349ba 1864742 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import json
import gradio as gr
import pandas as pd
def read_json(file_name):
with open(file_name, "r") as f:
json_data = json.load(f)
return json_data
json_file = "awesome-japanese-nlp-resources-search.json"
json_data = read_json(json_file)
data = {"project_name": [], "source": [], "description": [], "languages": []}
for data_json in json_data:
url = data_json["url"]
description = data_json["description"]
project_name = data_json["project_name"]
source = data_json["source"]
languages = data_json["languages"]
data["project_name"].append(f"[{project_name}]({url})")
data["source"].append(source)
data["description"].append(description)
data["languages"].append(", ".join(languages))
data = pd.DataFrame(data)
def show_search_results(language_filter, queries, source_checkbox):
queries = queries.lower()
queries = queries.split()
df_search = data
if language_filter:
regex_pattern = r"\b" + language_filter + r"\b"
df_search = df_search[
df_search["languages"].str.contains(
regex_pattern, case=False, regex=True, na=False
)
]
if "GitHub" not in source_checkbox:
df_search = df_search[df_search["source"] != "GitHub"]
if "Hugging Face" not in source_checkbox:
df_search = df_search[df_search["source"] != "Hugging Face"]
for query in queries:
contained_description = data["description"].str.contains(query)
contained_project_name = data["project_name"].str.contains(query)
df_search = df_search[contained_description | contained_project_name]
return df_search
with gr.Blocks() as demo:
gr.Markdown(
"""
# Awesome Japanese NLP resources search 🔎
You can search for open-source software from [1250+ Japanese NLP repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources).
"""
)
query = gr.Textbox(
label="Search English or Japanese words", placeholder="llm"
)
languages = [
"Python",
"Jupyter Notebook",
"Java",
"C++",
"JavaScript",
"TypeScript",
"C#",
"Rust",
"Go",
"C",
"Kotlin",
"Ruby",
"Perl",
"Lua",
"PHP",
"Julia",
"R",
"Swift",
"Haskell",
"Scala",
]
language_selector = gr.Dropdown(
label="Programming Language",
choices=languages,
)
source_checkbox = gr.CheckboxGroup(
["GitHub", "Hugging Face"],
value=["GitHub", "Hugging Face"],
label="Source",
)
df = gr.DataFrame(
value=data, type="pandas", datatype="markdown", height=1000
)
query.change(
fn=show_search_results,
inputs=[language_selector, query, source_checkbox],
outputs=df,
)
language_selector.change(
fn=show_search_results,
inputs=[language_selector, query, source_checkbox],
outputs=df,
)
source_checkbox.change(
fn=show_search_results,
inputs=[language_selector, query, source_checkbox],
outputs=df,
)
demo.launch()
|