KevinHuSh commited on
Commit
3a31a8a
·
1 Parent(s): 60101af

refine citation (#161)

Browse files
api/apps/conversation_app.py CHANGED
@@ -194,7 +194,8 @@ def chat(dialog, messages, **kwargs):
194
  # try to use sql if field mapping is good to go
195
  if field_map:
196
  chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
197
- return use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl)
 
198
 
199
  prompt_config = dialog.prompt_config
200
  for p in prompt_config["parameters"]:
@@ -305,7 +306,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl):
305
 
306
  tbl, sql = get_table()
307
  if tbl is None:
308
- return None, None
309
  if tbl.get("error") and tried_times <= 2:
310
  user_promt = """
311
  表名:{};
@@ -333,7 +334,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl):
333
  chat_logger.info("GET table: {}".format(tbl))
334
  print(tbl)
335
  if tbl.get("error") or len(tbl["rows"]) == 0:
336
- return None, None
337
 
338
  docid_idx = set([ii for ii, c in enumerate(
339
  tbl["columns"]) if c["name"] == "doc_id"])
 
194
  # try to use sql if field mapping is good to go
195
  if field_map:
196
  chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
197
+ ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl)
198
+ if ans: return ans
199
 
200
  prompt_config = dialog.prompt_config
201
  for p in prompt_config["parameters"]:
 
306
 
307
  tbl, sql = get_table()
308
  if tbl is None:
309
+ return None
310
  if tbl.get("error") and tried_times <= 2:
311
  user_promt = """
312
  表名:{};
 
334
  chat_logger.info("GET table: {}".format(tbl))
335
  print(tbl)
336
  if tbl.get("error") or len(tbl["rows"]) == 0:
337
+ return None
338
 
339
  docid_idx = set([ii for ii, c in enumerate(
340
  tbl["columns"]) if c["name"] == "doc_id"])
rag/app/paper.py CHANGED
@@ -120,7 +120,7 @@ class Pdf(PdfParser):
120
  print(tbls)
121
 
122
  return {
123
- "title": title if title else filename,
124
  "authors": " ".join(authors),
125
  "abstract": abstr,
126
  "sections": [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) for b in self.boxes[i:] if
 
120
  print(tbls)
121
 
122
  return {
123
+ "title": title,
124
  "authors": " ".join(authors),
125
  "abstract": abstr,
126
  "sections": [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) for b in self.boxes[i:] if
rag/nlp/search.py CHANGED
@@ -246,19 +246,22 @@ class Dealer:
246
  chunks_tks = [huqie.qie(self.qryr.rmWWW(ck)).split(" ")
247
  for ck in chunks]
248
  cites = {}
249
- for i, a in enumerate(pieces_):
250
- sim, tksim, vtsim = self.qryr.hybrid_similarity(ans_v[i],
251
- chunk_v,
252
- huqie.qie(
253
- self.qryr.rmWWW(pieces_[i])).split(" "),
254
- chunks_tks,
255
- tkweight, vtweight)
256
- mx = np.max(sim) * 0.99
257
- es_logger.info("{} SIM: {}".format(pieces_[i], mx))
258
- if mx < 0.63:
259
- continue
260
- cites[idx[i]] = list(
261
- set([str(ii) for ii in range(len(chunk_v)) if sim[ii] > mx]))[:4]
 
 
 
262
 
263
  res = ""
264
  seted = set([])
 
246
  chunks_tks = [huqie.qie(self.qryr.rmWWW(ck)).split(" ")
247
  for ck in chunks]
248
  cites = {}
249
+ thr = 0.63
250
+ while len(cites.keys()) == 0 and pieces_ and chunks_tks:
251
+ for i, a in enumerate(pieces_):
252
+ sim, tksim, vtsim = self.qryr.hybrid_similarity(ans_v[i],
253
+ chunk_v,
254
+ huqie.qie(
255
+ self.qryr.rmWWW(pieces_[i])).split(" "),
256
+ chunks_tks,
257
+ tkweight, vtweight)
258
+ mx = np.max(sim) * 0.99
259
+ es_logger.info("{} SIM: {}".format(pieces_[i], mx))
260
+ if mx < thr:
261
+ continue
262
+ cites[idx[i]] = list(
263
+ set([str(ii) for ii in range(len(chunk_v)) if sim[ii] > mx]))[:4]
264
+ thr *= 0.8
265
 
266
  res = ""
267
  seted = set([])