Spaces:

retailcrmservices
/

omnidesk-ai-test

Runtime error

makcrx commited on Aug 19, 2023

Commit

91856d0

•

1 Parent(s): fdc26d9

2023-08-20

Files changed (4) hide show

test.py → embed_qa.py RENAMED Viewed

@@ -2,21 +2,13 @@ import sqlite3, json
 from contextlib import closing
 # change THIS
-output_dir = 'faiss_qa_2023-08-09'
 model_name = "multi-qa-MiniLM-L6-cos-v1"
 punctuation = '!"#\'(),:;?[]^`}{'
 punctuation2 = '-/&._~+*=@<>[]\\'
 remove_punctuation = str.maketrans(punctuation2, ' ' * len(punctuation2), punctuation)
-def add_special_questions(questions):
-  questions.append({
-    "question": "Позови человека/менеджера",
-    "query": "Позови человека/менеджера",
-    "answer": "Переключаю на сотрудника, ожидайте",
-    "articleId": 0,
-  })
 def load_questions(sqlite_filename):
   all_questions = []
   with closing(sqlite3.connect(sqlite_filename)) as db:
@@ -28,15 +20,15 @@ def load_questions(sqlite_filename):
       ).fetchall()
       for res in results:
         questions = json.loads(res['questions'])
         for q in questions:
           q['query'] = " ".join(res['section'].split() + res['title'].split() + q['question'].split()).translate(remove_punctuation).lower()
           q['articleId'] = res['articleId']
         all_questions += questions
-  add_special_questions(all_questions)
   return all_questions
 print("Loading questions from db...")

 from contextlib import closing
 # change THIS
+output_dir = 'faiss_qa_2023-08-20'
 model_name = "multi-qa-MiniLM-L6-cos-v1"
 punctuation = '!"#\'(),:;?[]^`}{'
 punctuation2 = '-/&._~+*=@<>[]\\'
 remove_punctuation = str.maketrans(punctuation2, ' ' * len(punctuation2), punctuation)
 def load_questions(sqlite_filename):
   all_questions = []
   with closing(sqlite3.connect(sqlite_filename)) as db:
       ).fetchall()
       for res in results:
+        if res['section'].lower() == 'служебные ответы':
+          res['section'] = ''
         questions = json.loads(res['questions'])
         for q in questions:
           q['query'] = " ".join(res['section'].split() + res['title'].split() + q['question'].split()).translate(remove_punctuation).lower()
           q['articleId'] = res['articleId']
         all_questions += questions
   return all_questions
 print("Loading questions from db...")

faiss_qa_2023-08-20/index.faiss ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a8a5e3d0342187d57b9a80c431b59019c99e2ce85fdc32666a0578b958efd3b
+size 583725

faiss_qa_2023-08-20/index.pkl ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1da6f468c5ba954ba92921fa207550ba693c009ae1b6ec132d3a911e52d4f5f
+size 267292

test.ipynb CHANGED Viewed

@@ -67,18 +67,10 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2023-08-07 17:36:37.358149: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
-     ]
-    }
-   ],
    "source": [
-    "from extract_keywords import canonical_keywords, merge_keywords, tokenize_sentence, extract_keywords, init_keyword_extractor\n",
-    "init_keyword_extractor()"
    ]
   },
   {
@@ -86,28 +78,16 @@
    "execution_count": 5,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/makcrx/anaconda3/lib/python3.10/site-packages/sklearn/feature_extraction/text.py:528: UserWarning: The parameter 'token_pattern' will not be used since 'tokenizer' is not None'\n",
-      "  warnings.warn(\n",
-      "/home/makcrx/anaconda3/lib/python3.10/site-packages/sklearn/feature_extraction/text.py:528: UserWarning: The parameter 'token_pattern' will not be used since 'tokenizer' is not None'\n",
-      "  warnings.warn(\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "['яндекс доставка экспресс']\n",
-      "[]\n"
      ]
     }
    ],
    "source": [
-    "print(extract_keywords('яд экспресс'))\n",
-    "print(extract_keywords('яндекс.доставка'))"
    ]
   },
   {

    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
+   "outputs": [],
    "source": [
+    "from extract_keywords import normalize_word, canonical_keywords, merge_keywords, tokenize_sentence, extract_keywords, init_keyword_extractor\n",
+    "#init_keyword_extractor()"
    ]
   },
   {
    "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "['почта', 'россия']\n"
      ]
     }
    ],
    "source": [
+    "print(tokenize_sentence('почты росии'))"
    ]
   },
   {