madoss commited on
Commit
45d6b11
1 Parent(s): 19b90a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -19
app.py CHANGED
@@ -1,24 +1,26 @@
 
1
  import gradio as gr
2
- import os
3
- import query_index
4
  import datasets
5
  import sentence_transformers
6
 
7
- def query(text, k=5):
8
- model = sentence_transformers.SentenceTransformer(
9
- "dangvantuan/sentence-camembert-large", device="cpu")
10
 
11
- dataset = datasets.load_dataset("json", data_files=["./data/dataset.json"], split="train")
12
- dataset.load_faiss_index("embeddings", "index.faiss")
 
 
 
 
13
 
14
- query_embedding = model.encode(text)
 
15
  _, retrieved_examples = dataset.get_nearest_examples(
16
  "embeddings",
17
  query_embedding,
18
  k=k,
19
- )
20
-
21
 
 
22
  for text, start, end, title, url in zip(
23
  retrieved_examples["text"],
24
  retrieved_examples["start"],
@@ -28,16 +30,25 @@ def query(text, k=5):
28
  ):
29
  start = start
30
  end = end
31
- print(f"title: {title}")
32
- print(f"transcript: [{str(start)+' ====> '+str(end)}] {text}")
33
- print(f"link: {url}")
34
- print("*" * 10)
 
 
 
35
 
36
  iface = gr.Interface(
37
- fn=query,
38
- inputs='text',
39
- outputs='text',
40
- examples=[["Qu'est ce qui t'a fait le plus progresser?"]]
 
 
 
 
 
41
  )
42
 
43
- iface.launch()
 
 
1
+ import logging
2
  import gradio as gr
 
 
3
  import datasets
4
  import sentence_transformers
5
 
6
+ logging.disable(logging.CRITICAL)
 
 
7
 
8
+ model = sentence_transformers.SentenceTransformer(
9
+ "dangvantuan/sentence-camembert-large", device="cuda"
10
+ )
11
+
12
+ dataset = datasets.load_dataset("json", data_files=["./data/dataset.json"], split="train")
13
+ dataset.load_faiss_index("embeddings", "index.faiss")
14
 
15
+ def search(query: str, k: int):
16
+ query_embedding = model.encode(query)
17
  _, retrieved_examples = dataset.get_nearest_examples(
18
  "embeddings",
19
  query_embedding,
20
  k=k,
21
+ )
 
22
 
23
+ results = []
24
  for text, start, end, title, url in zip(
25
  retrieved_examples["text"],
26
  retrieved_examples["start"],
 
30
  ):
31
  start = start
32
  end = end
33
+ result = {
34
+ "title": title,
35
+ "transcript": f"[{str(start)+' ====> '+str(end)}] {text}",
36
+ "link": url
37
+ }
38
+ results.append(result)
39
+ return results
40
 
41
  iface = gr.Interface(
42
+ fn=search,
43
+ inputs=["text", "number"],
44
+ outputs=gr.outputs.JSON(),
45
+ title="Search Dataset",
46
+ description="Search a dataset using Camembert and Faiss.",
47
+ example=[
48
+ "Enter a query to search for.",
49
+ 5
50
+ ]
51
  )
52
 
53
+ if __name__ == "__main__":
54
+ iface.launch()