taishi-i's picture
update app.py
1864742
raw history blame
No virus
1.25 kB
import json
import gradio as gr
import pandas as pd
from pyserini.search.lucene import LuceneSearcher
searcher = LuceneSearcher("indexes/docs")
searcher.set_language("ja")
def show_search_results(query):
search_results = searcher.search(query, k=100)
data = {"project_name": [], "description": []}
for result in search_results:
docid = result.docid
doc = searcher.doc(docid)
data_json = json.loads(doc.raw())
url = data_json["url"]
description = data_json["description"]
project_name = data_json["project_name"]
data["project_name"].append(f"[{project_name}]({url})")
data["description"].append(description)
data = pd.DataFrame(data)
return data
with gr.Blocks() as demo:
gr.Markdown(
"""
# Awesome Japanese NLP resources search πŸ”Ž
You can search for open-source software from [500+ Japanese NLP repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources).
"""
)
query = gr.Textbox(
label="Search English or Japanese words", placeholder="llm"
)
df = gr.DataFrame(type="pandas", datatype="markdown", height=1000)
query.change(fn=show_search_results, inputs=query, outputs=df)
demo.launch()