madoss commited on
Commit
d82542d
1 Parent(s): c4ddf9f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -21
app.py CHANGED
@@ -1,25 +1,24 @@
 
1
  import logging
2
- import gradio as gr
3
  import datasets
4
- import sentence_transformers
5
 
6
  logging.disable(logging.CRITICAL)
7
 
8
- model = sentence_transformers.SentenceTransformer(
9
- "dangvantuan/sentence-camembert-large", device="cuda"
10
- )
11
 
12
- dataset = datasets.load_dataset("json", data_files=["./dataset.json"], split="train")
13
  dataset.load_faiss_index("embeddings", "index.faiss")
14
 
15
- def search(query: str, k: int):
16
  query_embedding = model.encode(query)
17
  _, retrieved_examples = dataset.get_nearest_examples(
18
  "embeddings",
19
  query_embedding,
20
  k=k,
21
  )
22
-
23
  results = []
24
  for text, start, end, title, url in zip(
25
  retrieved_examples["text"],
@@ -32,23 +31,27 @@ def search(query: str, k: int):
32
  end = end
33
  result = {
34
  "title": title,
35
- "transcript": f"[{str(start)+' ====> '+str(end)}] {text}",
36
- "link": url
37
  }
38
  results.append(result)
39
  return results
40
 
41
  iface = gr.Interface(
42
- fn=search,
43
- inputs=["text", "number"],
44
- outputs=gr.outputs.JSON(),
45
- title="Search Dataset",
46
- description="Search a dataset using Camembert and Faiss.",
47
- example=[
48
- "Enter a query to search for.",
49
- 5
50
- ]
 
 
 
 
 
51
  )
52
 
53
- if __name__ == "__main__":
54
- iface.launch()
 
1
+ import argparse
2
  import logging
3
+
4
  import datasets
5
+ import gradio as gr
6
 
7
  logging.disable(logging.CRITICAL)
8
 
9
+ model_name = "dangvantuan/sentence-camembert-large"
10
+ model = gr.load(model_name)
 
11
 
12
+ dataset = datasets.load_dataset("json", data_files=["./data/dataset.json"], split="train")
13
  dataset.load_faiss_index("embeddings", "index.faiss")
14
 
15
+ def search(query, k):
16
  query_embedding = model.encode(query)
17
  _, retrieved_examples = dataset.get_nearest_examples(
18
  "embeddings",
19
  query_embedding,
20
  k=k,
21
  )
 
22
  results = []
23
  for text, start, end, title, url in zip(
24
  retrieved_examples["text"],
 
31
  end = end
32
  result = {
33
  "title": title,
34
+ "transcript": f"[{str(start)} ====> {str(end)}] {text}",
35
+ "link": url,
36
  }
37
  results.append(result)
38
  return results
39
 
40
  iface = gr.Interface(
41
+ search,
42
+ inputs=[
43
+ gr.inputs.Textbox(label="Query"),
44
+ gr.inputs.Number(label="K", default=3, min_value=1, max_value=10),
45
+ ],
46
+ outputs=[
47
+ gr.outputs.Textbox(label="Title"),
48
+ gr.outputs.Textbox(label="Transcript"),
49
+ gr.outputs.Textbox(label="Link"),
50
+ ],
51
+ title="Camembert and Faiss-powered Search Engine",
52
+ description="Search through a dataset using Camembert and Faiss",
53
+ theme="light",
54
+ layout="vertical",
55
  )
56
 
57
+ iface.launch()