Spaces:

retopara
/

ragflow

Build error

KevinHuSh commited on Mar 28, 2024

Commit

3a31a8a

1 Parent(s): 60101af

refine citation (#161)

Files changed (3) hide show

api/apps/conversation_app.py CHANGED Viewed

@@ -194,7 +194,8 @@ def chat(dialog, messages, **kwargs):
     # try to use sql if field mapping is good to go
     if field_map:
         chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
-        return use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl)
     prompt_config = dialog.prompt_config
     for p in prompt_config["parameters"]:
@@ -305,7 +306,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl):
     tbl, sql = get_table()
     if tbl is None:
-        return None, None
     if tbl.get("error") and tried_times <= 2:
         user_promt = """
         表名：{}；
@@ -333,7 +334,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl):
     chat_logger.info("GET table: {}".format(tbl))
     print(tbl)
     if tbl.get("error") or len(tbl["rows"]) == 0:
-        return None, None
     docid_idx = set([ii for ii, c in enumerate(
         tbl["columns"]) if c["name"] == "doc_id"])

     # try to use sql if field mapping is good to go
     if field_map:
         chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
+        ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl)
+        if ans: return ans
     prompt_config = dialog.prompt_config
     for p in prompt_config["parameters"]:
     tbl, sql = get_table()
     if tbl is None:
+        return None
     if tbl.get("error") and tried_times <= 2:
         user_promt = """
         表名：{}；
     chat_logger.info("GET table: {}".format(tbl))
     print(tbl)
     if tbl.get("error") or len(tbl["rows"]) == 0:
+        return None
     docid_idx = set([ii for ii, c in enumerate(
         tbl["columns"]) if c["name"] == "doc_id"])

rag/app/paper.py CHANGED Viewed

@@ -120,7 +120,7 @@ class Pdf(PdfParser):
         print(tbls)
         return {
-            "title": title if title else filename,
             "authors": " ".join(authors),
             "abstract": abstr,
             "sections": [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) for b in self.boxes[i:] if

         print(tbls)
         return {
+            "title": title,
             "authors": " ".join(authors),
             "abstract": abstr,
             "sections": [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) for b in self.boxes[i:] if

rag/nlp/search.py CHANGED Viewed

@@ -246,19 +246,22 @@ class Dealer:
         chunks_tks = [huqie.qie(self.qryr.rmWWW(ck)).split(" ")
                       for ck in chunks]
         cites = {}
-        for i, a in enumerate(pieces_):
-            sim, tksim, vtsim = self.qryr.hybrid_similarity(ans_v[i],
-                                                            chunk_v,
-                                                            huqie.qie(
-                                                                self.qryr.rmWWW(pieces_[i])).split(" "),
-                                                            chunks_tks,
-                                                            tkweight, vtweight)
-            mx = np.max(sim) * 0.99
-            es_logger.info("{} SIM: {}".format(pieces_[i], mx))
-            if mx < 0.63:
-                continue
-            cites[idx[i]] = list(
-                set([str(ii) for ii in range(len(chunk_v)) if sim[ii] > mx]))[:4]
         res = ""
         seted = set([])

         chunks_tks = [huqie.qie(self.qryr.rmWWW(ck)).split(" ")
                       for ck in chunks]
         cites = {}
+        thr = 0.63
+        while len(cites.keys()) == 0 and pieces_ and chunks_tks:
+            for i, a in enumerate(pieces_):
+                sim, tksim, vtsim = self.qryr.hybrid_similarity(ans_v[i],
+                                                                chunk_v,
+                                                                huqie.qie(
+                                                                    self.qryr.rmWWW(pieces_[i])).split(" "),
+                                                                chunks_tks,
+                                                                tkweight, vtweight)
+                mx = np.max(sim) * 0.99
+                es_logger.info("{} SIM: {}".format(pieces_[i], mx))
+                if mx < thr:
+                    continue
+                cites[idx[i]] = list(
+                    set([str(ii) for ii in range(len(chunk_v)) if sim[ii] > mx]))[:4]
+            thr *= 0.8
         res = ""
         seted = set([])