File size: 1,254 Bytes
c5b16f9
 
1864742
 
c5b16f9
 
1864742
 
94349ba
 
1864742
 
94349ba
1864742
 
 
 
 
94349ba
1864742
 
 
94349ba
1864742
 
94349ba
1864742
 
94349ba
 
1864742
 
 
 
 
 
 
94349ba
1864742
 
 
 
94349ba
1864742
94349ba
1864742
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import json

import gradio as gr
import pandas as pd
from pyserini.search.lucene import LuceneSearcher

searcher = LuceneSearcher("indexes/docs")
searcher.set_language("ja")


def show_search_results(query):
    search_results = searcher.search(query, k=100)

    data = {"project_name": [], "description": []}
    for result in search_results:
        docid = result.docid
        doc = searcher.doc(docid)
        data_json = json.loads(doc.raw())

        url = data_json["url"]
        description = data_json["description"]
        project_name = data_json["project_name"]

        data["project_name"].append(f"[{project_name}]({url})")
        data["description"].append(description)

    data = pd.DataFrame(data)
    return data


with gr.Blocks() as demo:
    gr.Markdown(
        """
    # Awesome Japanese NLP resources search 🔎
    You can search for open-source software from [500+ Japanese NLP repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources).
    """
    )

    query = gr.Textbox(
        label="Search English or Japanese words", placeholder="llm"
    )
    df = gr.DataFrame(type="pandas", datatype="markdown", height=1000)

    query.change(fn=show_search_results, inputs=query, outputs=df)

demo.launch()