Commit
·
1427166
1
Parent(s):
bec7e87
Fix chunk creation using Infinity (#3763)
Browse files### What problem does this PR solve?
1. Store error type in Infinity
2. position list value read from Infinity isn't correct.
Fix issue: #3729
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
Signed-off-by: jinhai <haijin.chn@gmail.com>
- api/apps/chunk_app.py +5 -0
- rag/utils/infinity_conn.py +4 -1
api/apps/chunk_app.py
CHANGED
@@ -60,6 +60,11 @@ def list_chunk():
|
|
60 |
sres = settings.retrievaler.search(query, search.index_name(tenant_id), kb_ids, highlight=True)
|
61 |
res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
|
62 |
for id in sres.ids:
|
|
|
|
|
|
|
|
|
|
|
63 |
d = {
|
64 |
"chunk_id": id,
|
65 |
"content_with_weight": rmSpace(sres.highlight[id]) if question and id in sres.highlight else sres.field[
|
|
|
60 |
sres = settings.retrievaler.search(query, search.index_name(tenant_id), kb_ids, highlight=True)
|
61 |
res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
|
62 |
for id in sres.ids:
|
63 |
+
chunk_elem = sres.field[id]
|
64 |
+
if 'position_list' in chunk_elem:
|
65 |
+
if isinstance(chunk_elem["position_list"], str):
|
66 |
+
chunk_elem.pop('position_list') # Infinity will store position list as empty str
|
67 |
+
|
68 |
d = {
|
69 |
"chunk_id": id,
|
70 |
"content_with_weight": rmSpace(sres.highlight[id]) if question and id in sres.highlight else sres.field[
|
rag/utils/infinity_conn.py
CHANGED
@@ -328,7 +328,7 @@ class InfinityConnection(DocStoreConnection):
|
|
328 |
table_instance = db_instance.get_table(table_name)
|
329 |
except InfinityException as e:
|
330 |
# src/common/status.cppm, kTableNotExist = 3022
|
331 |
-
if e.error_code !=
|
332 |
raise
|
333 |
vector_size = 0
|
334 |
patt = re.compile(r"q_(?P<vector_size>\d+)_vec")
|
@@ -348,6 +348,9 @@ class InfinityConnection(DocStoreConnection):
|
|
348 |
for k, v in d.items():
|
349 |
if k.endswith("_kwd") and isinstance(v, list):
|
350 |
d[k] = " ".join(v)
|
|
|
|
|
|
|
351 |
ids = ["'{}'".format(d["id"]) for d in documents]
|
352 |
str_ids = ", ".join(ids)
|
353 |
str_filter = f"id IN ({str_ids})"
|
|
|
328 |
table_instance = db_instance.get_table(table_name)
|
329 |
except InfinityException as e:
|
330 |
# src/common/status.cppm, kTableNotExist = 3022
|
331 |
+
if e.error_code != ErrorCode.TABLE_NOT_EXIST:
|
332 |
raise
|
333 |
vector_size = 0
|
334 |
patt = re.compile(r"q_(?P<vector_size>\d+)_vec")
|
|
|
348 |
for k, v in d.items():
|
349 |
if k.endswith("_kwd") and isinstance(v, list):
|
350 |
d[k] = " ".join(v)
|
351 |
+
if k == 'kb_id':
|
352 |
+
if isinstance(d[k], list):
|
353 |
+
d[k] = d[k][0] # since d[k] is a list, but we need a str
|
354 |
ids = ["'{}'".format(d["id"]) for d in documents]
|
355 |
str_ids = ", ".join(ids)
|
356 |
str_filter = f"id IN ({str_ids})"
|