jinhai-2012 commited on
Commit
403ecb1
·
1 Parent(s): 5e4c165

Fix 'SCORE' not found bug (#4178)

Browse files

### What problem does this PR solve?

As title

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Signed-off-by: jinhai <haijin.chn@gmail.com>

api/db/services/dialog_service.py CHANGED
@@ -311,7 +311,7 @@ def chat(dialog, messages, stream=True, **kwargs):
311
  retrieval_time_cost = (retrieval_ts - generate_keyword_ts) * 1000
312
  generate_result_time_cost = (finish_chat_ts - retrieval_ts) * 1000
313
 
314
- prompt = f"{prompt} ### Elapsed\n - Total: {total_time_cost:.1f}ms\n - Check LLM: {check_llm_time_cost:.1f}ms\n - Create retriever: {create_retriever_time_cost:.1f}ms\n - Bind embedding: {bind_embedding_time_cost:.1f}ms\n - Bind LLM: {bind_llm_time_cost:.1f}ms\n - Tune question: {refine_question_time_cost:.1f}ms\n - Bind reranker: {bind_reranker_time_cost:.1f}ms\n - Generate keyword: {generate_keyword_time_cost:.1f}ms\n - Retrieval: {retrieval_time_cost:.1f}ms\n - Generate answer: {generate_result_time_cost:.1f}ms"
315
  return {"answer": answer, "reference": refs, "prompt": prompt}
316
 
317
  if stream:
 
311
  retrieval_time_cost = (retrieval_ts - generate_keyword_ts) * 1000
312
  generate_result_time_cost = (finish_chat_ts - retrieval_ts) * 1000
313
 
314
+ prompt = f"{prompt}\n\n - Total: {total_time_cost:.1f}ms\n - Check LLM: {check_llm_time_cost:.1f}ms\n - Create retriever: {create_retriever_time_cost:.1f}ms\n - Bind embedding: {bind_embedding_time_cost:.1f}ms\n - Bind LLM: {bind_llm_time_cost:.1f}ms\n - Tune question: {refine_question_time_cost:.1f}ms\n - Bind reranker: {bind_reranker_time_cost:.1f}ms\n - Generate keyword: {generate_keyword_time_cost:.1f}ms\n - Retrieval: {retrieval_time_cost:.1f}ms\n - Generate answer: {generate_result_time_cost:.1f}ms"
315
  return {"answer": answer, "reference": refs, "prompt": prompt}
316
 
317
  if stream:
api/db/services/task_service.py CHANGED
@@ -204,7 +204,7 @@ def queue_tasks(doc: dict, bucket: str, name: str):
204
  def new_task():
205
  return {"id": get_uuid(), "doc_id": doc["id"], "progress": 0.0, "from_page": 0, "to_page": 100000000}
206
 
207
- tsks = []
208
 
209
  if doc["type"] == FileType.PDF.value:
210
  file_bin = STORAGE_IMPL.get(bucket, name)
@@ -224,7 +224,7 @@ def queue_tasks(doc: dict, bucket: str, name: str):
224
  task = new_task()
225
  task["from_page"] = p
226
  task["to_page"] = min(p + page_size, e)
227
- tsks.append(task)
228
 
229
  elif doc["parser_id"] == "table":
230
  file_bin = STORAGE_IMPL.get(bucket, name)
@@ -233,12 +233,12 @@ def queue_tasks(doc: dict, bucket: str, name: str):
233
  task = new_task()
234
  task["from_page"] = i
235
  task["to_page"] = min(i + 3000, rn)
236
- tsks.append(task)
237
  else:
238
- tsks.append(new_task())
239
 
240
  chunking_config = DocumentService.get_chunking_config(doc["id"])
241
- for task in tsks:
242
  hasher = xxhash.xxh64()
243
  for field in sorted(chunking_config.keys()):
244
  hasher.update(str(chunking_config[field]).encode("utf-8"))
@@ -251,7 +251,7 @@ def queue_tasks(doc: dict, bucket: str, name: str):
251
  prev_tasks = TaskService.get_tasks(doc["id"])
252
  ck_num = 0
253
  if prev_tasks:
254
- for task in tsks:
255
  ck_num += reuse_prev_task_chunks(task, prev_tasks, chunking_config)
256
  TaskService.filter_delete([Task.doc_id == doc["id"]])
257
  chunk_ids = []
@@ -263,13 +263,13 @@ def queue_tasks(doc: dict, bucket: str, name: str):
263
  chunking_config["kb_id"])
264
  DocumentService.update_by_id(doc["id"], {"chunk_num": ck_num})
265
 
266
- bulk_insert_into_db(Task, tsks, True)
267
  DocumentService.begin2parse(doc["id"])
268
 
269
- tsks = [task for task in tsks if task["progress"] < 1.0]
270
- for t in tsks:
271
  assert REDIS_CONN.queue_product(
272
- SVR_QUEUE_NAME, message=t
273
  ), "Can't access Redis. Please check the Redis' status."
274
 
275
 
 
204
  def new_task():
205
  return {"id": get_uuid(), "doc_id": doc["id"], "progress": 0.0, "from_page": 0, "to_page": 100000000}
206
 
207
+ parse_task_array = []
208
 
209
  if doc["type"] == FileType.PDF.value:
210
  file_bin = STORAGE_IMPL.get(bucket, name)
 
224
  task = new_task()
225
  task["from_page"] = p
226
  task["to_page"] = min(p + page_size, e)
227
+ parse_task_array.append(task)
228
 
229
  elif doc["parser_id"] == "table":
230
  file_bin = STORAGE_IMPL.get(bucket, name)
 
233
  task = new_task()
234
  task["from_page"] = i
235
  task["to_page"] = min(i + 3000, rn)
236
+ parse_task_array.append(task)
237
  else:
238
+ parse_task_array.append(new_task())
239
 
240
  chunking_config = DocumentService.get_chunking_config(doc["id"])
241
+ for task in parse_task_array:
242
  hasher = xxhash.xxh64()
243
  for field in sorted(chunking_config.keys()):
244
  hasher.update(str(chunking_config[field]).encode("utf-8"))
 
251
  prev_tasks = TaskService.get_tasks(doc["id"])
252
  ck_num = 0
253
  if prev_tasks:
254
+ for task in parse_task_array:
255
  ck_num += reuse_prev_task_chunks(task, prev_tasks, chunking_config)
256
  TaskService.filter_delete([Task.doc_id == doc["id"]])
257
  chunk_ids = []
 
263
  chunking_config["kb_id"])
264
  DocumentService.update_by_id(doc["id"], {"chunk_num": ck_num})
265
 
266
+ bulk_insert_into_db(Task, parse_task_array, True)
267
  DocumentService.begin2parse(doc["id"])
268
 
269
+ unfinished_task_array = [task for task in parse_task_array if task["progress"] < 1.0]
270
+ for unfinished_task in unfinished_task_array:
271
  assert REDIS_CONN.queue_product(
272
+ SVR_QUEUE_NAME, message=unfinished_task
273
  ), "Can't access Redis. Please check the Redis' status."
274
 
275
 
rag/utils/infinity_conn.py CHANGED
@@ -342,7 +342,7 @@ class InfinityConnection(DocStoreConnection):
342
  self.connPool.release_conn(inf_conn)
343
  res = concat_dataframes(df_list, selectFields)
344
  if matchExprs:
345
- res = res.sort(pl.col("SCORE") + pl.col("pagerank_fea"), descending=True, maintain_order=True)
346
  res = res.limit(limit)
347
  logger.debug(f"INFINITY search final result: {str(res)}")
348
  return res, total_hits_count
 
342
  self.connPool.release_conn(inf_conn)
343
  res = concat_dataframes(df_list, selectFields)
344
  if matchExprs:
345
+ res = res.sort(pl.col("score()") + pl.col("pagerank_fea"), descending=True, maintain_order=True)
346
  res = res.limit(limit)
347
  logger.debug(f"INFINITY search final result: {str(res)}")
348
  return res, total_hits_count