Spaces:
Sleeping
Sleeping
File size: 1,557 Bytes
d82542d 45d6b11 0d70765 2014880 d82542d 2014880 45d6b11 2014880 0d70765 45d6b11 39e199d 45d6b11 2014880 9a26bab 45d6b11 2014880 d69bc63 45d6b11 2014880 45d6b11 d82542d 45d6b11 2014880 9a26bab d82542d 9a26bab 1700eab d82542d 9a26bab d82542d 9a26bab d82542d 2014880 d82542d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import argparse
import logging
import sentence_transformers
import datasets
import gradio as gr
logging.disable(logging.CRITICAL)
model = sentence_transformers.SentenceTransformer(
"dangvantuan/sentence-camembert-large", device="cpu")
dataset = datasets.load_dataset("json", data_files=["./dataset.json"], split="train")
dataset.load_faiss_index("embeddings", "index.faiss")
def search(query, k):
query_embedding = model.encode(query)
_, retrieved_examples = dataset.get_nearest_examples(
"embeddings",
query_embedding,
k=int(k),
)
results = []
for text, start, end, title, url in zip(
retrieved_examples["text"],
retrieved_examples["start"],
retrieved_examples["end"],
retrieved_examples["title"],
retrieved_examples["url"],
):
start = start
end = end
result = {
"title": title,
"transcript": f"[{str(start)} ====> {str(end)}] {text}",
"link": url,
}
results.append(result)
return results
iface = gr.Interface(
search,
inputs=[
gr.inputs.Textbox(label="Query"),
gr.inputs.Number(label="K", default=3),
],
outputs=[
gr.outputs.Textbox(label="Result 1"),
gr.outputs.Textbox(label="Result 2"),
gr.outputs.Textbox(label="Result 3"),
],
title="Camembert and Faiss-powered Search Engine",
description="Search through a dataset using Camembert and Faiss",
theme="light",
layout="vertical",
)
iface.launch()
|