arjunanand13 commited on
Commit
3943635
1 Parent(s): 17d7c3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -1
app.py CHANGED
@@ -59,6 +59,16 @@ class DocumentRetrievalAndGeneration:
59
  )
60
  return generate_text
61
 
 
 
 
 
 
 
 
 
 
 
62
  def query_and_generate_response(self, query):
63
  query_embedding = self.embeddings.encode(query, convert_to_tensor=True).cpu().numpy()
64
  distances, indices = self.gpu_index.search(np.array([query_embedding]), k=5)
@@ -99,7 +109,8 @@ class DocumentRetrievalAndGeneration:
99
 
100
  # Perform inference and measure time
101
  start_time = datetime.now()
102
- generated_ids = self.llm.model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
 
103
  elapsed_time = datetime.now() - start_time
104
 
105
  # Decode and return output
 
59
  )
60
  return generate_text
61
 
62
+ def generate_response_with_timeout(self, model_inputs):
63
+ try:
64
+ # Start the generation process and set a timeout
65
+ with multiprocessing.Pool(processes=1) as pool:
66
+ result = pool.apply_async(self.llm.model.generate, (model_inputs,), {"max_new_tokens": 1000, "do_sample": True})
67
+ generated_ids = result.get(timeout=80) # Timeout set to 60 seconds
68
+ return generated_ids
69
+ except multiprocessing.TimeoutError:
70
+ raise TimeoutError("Text generation process timed out")
71
+
72
  def query_and_generate_response(self, query):
73
  query_embedding = self.embeddings.encode(query, convert_to_tensor=True).cpu().numpy()
74
  distances, indices = self.gpu_index.search(np.array([query_embedding]), k=5)
 
109
 
110
  # Perform inference and measure time
111
  start_time = datetime.now()
112
+ generated_ids = self.generate_response_with_timeout(model_inputs)
113
+ # generated_ids = self.llm.model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
114
  elapsed_time = datetime.now() - start_time
115
 
116
  # Decode and return output