File size: 1,557 Bytes
d82542d
45d6b11
0d70765
2014880
d82542d
2014880
45d6b11
2014880
0d70765
 
 
 
45d6b11
39e199d
45d6b11
2014880
9a26bab
45d6b11
2014880
 
 
d69bc63
45d6b11
 
2014880
 
 
 
 
 
 
 
 
45d6b11
 
d82542d
 
45d6b11
 
 
2014880
 
9a26bab
d82542d
9a26bab
1700eab
d82542d
 
9a26bab
 
 
d82542d
 
 
9a26bab
d82542d
2014880
 
d82542d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import argparse
import logging
import sentence_transformers
import datasets
import gradio as gr

logging.disable(logging.CRITICAL)


model = sentence_transformers.SentenceTransformer(
    "dangvantuan/sentence-camembert-large", device="cpu")


dataset = datasets.load_dataset("json", data_files=["./dataset.json"], split="train")
dataset.load_faiss_index("embeddings", "index.faiss")

def search(query, k):
    query_embedding = model.encode(query)
    _, retrieved_examples = dataset.get_nearest_examples(
        "embeddings",
        query_embedding,
        k=int(k),
    )
    results = []
    for text, start, end, title, url in zip(
        retrieved_examples["text"],
        retrieved_examples["start"],
        retrieved_examples["end"],
        retrieved_examples["title"],
        retrieved_examples["url"],
    ):
        start = start
        end = end
        result = {
            "title": title,
            "transcript": f"[{str(start)} ====> {str(end)}] {text}",
            "link": url,
        }
        results.append(result)
    return results

iface = gr.Interface(
    search,
    inputs=[
        gr.inputs.Textbox(label="Query"),
        gr.inputs.Number(label="K", default=3),
    ],
    outputs=[
        gr.outputs.Textbox(label="Result 1"),
        gr.outputs.Textbox(label="Result 2"),
        gr.outputs.Textbox(label="Result 3"),
    ],
    title="Camembert and Faiss-powered Search Engine",
    description="Search through a dataset using Camembert and Faiss",
    theme="light",
    layout="vertical",
)

iface.launch()