Abdul-Ib commited on
Commit
6097e30
1 Parent(s): 33cb56c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -59
app.py CHANGED
@@ -1,51 +1,11 @@
1
  import gradio as gr
2
  import torch
3
- import asyncio
4
  from helper_functions import *
5
  from rank_bm25 import BM25L
6
- import nest_asyncio
7
  import time
8
- nest_asyncio.apply()
9
- from aiogoogletrans import Translator
10
  import pprint
11
-
12
- # Initialize the translator
13
- translator = Translator()
14
-
15
- async def translate_bulk(bulk: list) -> list:
16
- """
17
- Translate the given text to English and return the translated text.
18
-
19
- Args:
20
- - text (str): The text to translate.
21
-
22
- Returns:
23
- - str: The translated text.
24
- """
25
- try:
26
- translated_bulk = await translator.translate(bulk, dest="en")
27
- translated_bulk = [
28
- translated_text.text.lower().strip() for translated_text in translated_bulk
29
- ]
30
- except Exception as e:
31
- print(f"Bulk Translation failed: {e}")
32
- translated_bulk = [
33
- text.lower().strip() for text in bulk
34
- ] # Use original text if translation fails
35
- return translated_bulk
36
-
37
- async def encode_document(document: str):
38
- """_summary_
39
-
40
- Args:
41
- document (str): _description_
42
-
43
- Returns:
44
- _type_: _description_
45
- """
46
- return semantic_model.encode(document, convert_to_tensor=True)
47
 
48
- async def predict(query):
49
  start_time = time.time()
50
  normalized_query_list = (
51
  [normalizer.clean_text(query)]
@@ -102,14 +62,8 @@ async def predict(query):
102
  categorize_time = categorize_end_time - categorize_start_time
103
  except Exception as e:
104
  return {"error": f"An error occurred while categorizing products: {e}"}
105
-
106
- try:
107
- translation_start_time = time.time()
108
- representation_list = await translate_bulk(tasks)
109
- except Exception as e:
110
- representation_list = tasks
111
- print(f"An error occurred while translating: {e}")
112
- translation_time = time.time() - translation_start_time
113
 
114
  try:
115
  # Tokenize representations for keyword search
@@ -123,16 +77,9 @@ async def predict(query):
123
 
124
  # Encode representations for semantic search
125
  encode_start_time = time.time()
126
- try:
127
- embeddings = await asyncio.gather(
128
- *[encode_document(document) for document in representation_list]
129
- )
130
- doc_embeddings = torch.stack(embeddings)
131
- except Exception as e:
132
- doc_embeddings = semantic_model.encode(
133
  representation_list, convert_to_tensor=True
134
  )
135
- print(f"An error occurred while encoding documents: {e}")
136
  encode_end_time = time.time()
137
  encode_time = encode_end_time - encode_start_time
138
 
@@ -159,8 +106,7 @@ async def predict(query):
159
 
160
  hits = {"results": results, "time_taken": time_taken, "normalize_query_time": normalize_query_time,
161
  "request_time": request_time, "normalization_time": normalization_time,
162
- "translation_time": translation_time, "categorize_time": categorize_time,
163
- "tokenization_time": tokenization_time, "encode_time": encode_time,
164
  "calculate_interrelations_time": calculate_interrelations_time,
165
  "process_time": process_time_taken}
166
 
 
1
  import gradio as gr
2
  import torch
 
3
  from helper_functions import *
4
  from rank_bm25 import BM25L
 
5
  import time
 
 
6
  import pprint
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ def predict(query):
9
  start_time = time.time()
10
  normalized_query_list = (
11
  [normalizer.clean_text(query)]
 
62
  categorize_time = categorize_end_time - categorize_start_time
63
  except Exception as e:
64
  return {"error": f"An error occurred while categorizing products: {e}"}
65
+
66
+ representation_list = tasks
 
 
 
 
 
 
67
 
68
  try:
69
  # Tokenize representations for keyword search
 
77
 
78
  # Encode representations for semantic search
79
  encode_start_time = time.time()
80
+ doc_embeddings = semantic_model.encode(
 
 
 
 
 
 
81
  representation_list, convert_to_tensor=True
82
  )
 
83
  encode_end_time = time.time()
84
  encode_time = encode_end_time - encode_start_time
85
 
 
106
 
107
  hits = {"results": results, "time_taken": time_taken, "normalize_query_time": normalize_query_time,
108
  "request_time": request_time, "normalization_time": normalization_time,
109
+ "categorize_time": categorize_time, "tokenization_time": tokenization_time, "encode_time": encode_time,
 
110
  "calculate_interrelations_time": calculate_interrelations_time,
111
  "process_time": process_time_taken}
112