KevinHuSh
commited on
Commit
·
3a31a8a
1
Parent(s):
60101af
refine citation (#161)
Browse files- api/apps/conversation_app.py +4 -3
- rag/app/paper.py +1 -1
- rag/nlp/search.py +16 -13
api/apps/conversation_app.py
CHANGED
@@ -194,7 +194,8 @@ def chat(dialog, messages, **kwargs):
|
|
194 |
# try to use sql if field mapping is good to go
|
195 |
if field_map:
|
196 |
chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
|
197 |
-
|
|
|
198 |
|
199 |
prompt_config = dialog.prompt_config
|
200 |
for p in prompt_config["parameters"]:
|
@@ -305,7 +306,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl):
|
|
305 |
|
306 |
tbl, sql = get_table()
|
307 |
if tbl is None:
|
308 |
-
return None
|
309 |
if tbl.get("error") and tried_times <= 2:
|
310 |
user_promt = """
|
311 |
表名:{};
|
@@ -333,7 +334,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl):
|
|
333 |
chat_logger.info("GET table: {}".format(tbl))
|
334 |
print(tbl)
|
335 |
if tbl.get("error") or len(tbl["rows"]) == 0:
|
336 |
-
return None
|
337 |
|
338 |
docid_idx = set([ii for ii, c in enumerate(
|
339 |
tbl["columns"]) if c["name"] == "doc_id"])
|
|
|
194 |
# try to use sql if field mapping is good to go
|
195 |
if field_map:
|
196 |
chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
|
197 |
+
ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl)
|
198 |
+
if ans: return ans
|
199 |
|
200 |
prompt_config = dialog.prompt_config
|
201 |
for p in prompt_config["parameters"]:
|
|
|
306 |
|
307 |
tbl, sql = get_table()
|
308 |
if tbl is None:
|
309 |
+
return None
|
310 |
if tbl.get("error") and tried_times <= 2:
|
311 |
user_promt = """
|
312 |
表名:{};
|
|
|
334 |
chat_logger.info("GET table: {}".format(tbl))
|
335 |
print(tbl)
|
336 |
if tbl.get("error") or len(tbl["rows"]) == 0:
|
337 |
+
return None
|
338 |
|
339 |
docid_idx = set([ii for ii, c in enumerate(
|
340 |
tbl["columns"]) if c["name"] == "doc_id"])
|
rag/app/paper.py
CHANGED
@@ -120,7 +120,7 @@ class Pdf(PdfParser):
|
|
120 |
print(tbls)
|
121 |
|
122 |
return {
|
123 |
-
"title": title
|
124 |
"authors": " ".join(authors),
|
125 |
"abstract": abstr,
|
126 |
"sections": [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) for b in self.boxes[i:] if
|
|
|
120 |
print(tbls)
|
121 |
|
122 |
return {
|
123 |
+
"title": title,
|
124 |
"authors": " ".join(authors),
|
125 |
"abstract": abstr,
|
126 |
"sections": [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) for b in self.boxes[i:] if
|
rag/nlp/search.py
CHANGED
@@ -246,19 +246,22 @@ class Dealer:
|
|
246 |
chunks_tks = [huqie.qie(self.qryr.rmWWW(ck)).split(" ")
|
247 |
for ck in chunks]
|
248 |
cites = {}
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
262 |
|
263 |
res = ""
|
264 |
seted = set([])
|
|
|
246 |
chunks_tks = [huqie.qie(self.qryr.rmWWW(ck)).split(" ")
|
247 |
for ck in chunks]
|
248 |
cites = {}
|
249 |
+
thr = 0.63
|
250 |
+
while len(cites.keys()) == 0 and pieces_ and chunks_tks:
|
251 |
+
for i, a in enumerate(pieces_):
|
252 |
+
sim, tksim, vtsim = self.qryr.hybrid_similarity(ans_v[i],
|
253 |
+
chunk_v,
|
254 |
+
huqie.qie(
|
255 |
+
self.qryr.rmWWW(pieces_[i])).split(" "),
|
256 |
+
chunks_tks,
|
257 |
+
tkweight, vtweight)
|
258 |
+
mx = np.max(sim) * 0.99
|
259 |
+
es_logger.info("{} SIM: {}".format(pieces_[i], mx))
|
260 |
+
if mx < thr:
|
261 |
+
continue
|
262 |
+
cites[idx[i]] = list(
|
263 |
+
set([str(ii) for ii in range(len(chunk_v)) if sim[ii] > mx]))[:4]
|
264 |
+
thr *= 0.8
|
265 |
|
266 |
res = ""
|
267 |
seted = set([])
|