import json import gradio as gr import pandas as pd def read_json(file_name): with open(file_name, "r") as f: json_data = json.load(f) return json_data json_file = "awesome-japanese-nlp-resources-search.json" json_data = read_json(json_file) data = {"project_name": [], "source": [], "description": [], "languages": []} for data_json in json_data: url = data_json["url"] description = data_json["description"] project_name = data_json["project_name"] source = data_json["source"] languages = data_json["languages"] data["project_name"].append(f"[{project_name}]({url})") data["source"].append(source) data["description"].append(description) data["languages"].append(", ".join(languages)) data = pd.DataFrame(data) def show_search_results(language_filter, queries, source_checkbox): queries = queries.lower() queries = queries.split() df_search = data if language_filter: regex_pattern = r"\b" + language_filter + r"\b" df_search = df_search[ df_search["languages"].str.contains( regex_pattern, case=False, regex=True, na=False ) ] if "GitHub" not in source_checkbox: df_search = df_search[df_search["source"] != "GitHub"] if "Hugging Face" not in source_checkbox: df_search = df_search[df_search["source"] != "Hugging Face"] for query in queries: contained_description = data["description"].str.contains(query) contained_project_name = data["project_name"].str.contains(query) df_search = df_search[contained_description | contained_project_name] return df_search with gr.Blocks() as demo: gr.Markdown( """ # Awesome Japanese NLP resources search 🔎 You can search for open-source software from [1250+ Japanese NLP repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources). """ ) query = gr.Textbox( label="Search English or Japanese words", placeholder="llm" ) languages = [ "Python", "Jupyter Notebook", "Java", "C++", "JavaScript", "TypeScript", "C#", "Rust", "Go", "C", "Kotlin", "Ruby", "Perl", "Lua", "PHP", "Julia", "R", "Swift", "Haskell", "Scala", ] language_selector = gr.Dropdown( label="Programming Language", choices=languages, ) source_checkbox = gr.CheckboxGroup( ["GitHub", "Hugging Face"], value=["GitHub", "Hugging Face"], label="Source", ) df = gr.DataFrame( value=data, type="pandas", datatype="markdown", height=1000 ) query.change( fn=show_search_results, inputs=[language_selector, query, source_checkbox], outputs=df, ) language_selector.change( fn=show_search_results, inputs=[language_selector, query, source_checkbox], outputs=df, ) source_checkbox.change( fn=show_search_results, inputs=[language_selector, query, source_checkbox], outputs=df, ) demo.launch()