Spaces:

retailcrmservices
/

omnidesk-ai-test

Runtime error

App Files Files Community

makcrx commited on Aug 9, 2023

Commit

9647155

1 Parent(s): 3334b06

update db

Browse files

Files changed (5) hide show

app.py +1 -1
faiss_qa_2023-08-09/index.faiss +3 -0
faiss_qa_2023-08-09/index.pkl +3 -0
test.ipynb +12 -10
test.py +64 -0

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import reranking
 from extract_keywords import init_keyword_extractor, extract_keywords
 embeddings = SentenceTransformerEmbeddings(model_name="multi-qa-MiniLM-L6-cos-v1")
-db = FAISS.load_local('faiss_qa', embeddings)
 init_keyword_extractor()
 def main(query):

 from extract_keywords import init_keyword_extractor, extract_keywords
 embeddings = SentenceTransformerEmbeddings(model_name="multi-qa-MiniLM-L6-cos-v1")
+db = FAISS.load_local('faiss_qa_2023-08-09', embeddings)
 init_keyword_extractor()
 def main(query):

faiss_qa_2023-08-09/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7690fb7aa21b8d325e3ce1a9f8fb241dc597aa06df042bf242c522433243b93f
+size 576045

faiss_qa_2023-08-09/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09f3705caef79861035ed026855cefb872c386e87938d73a5deb57479507364d
+size 265781

test.ipynb CHANGED Viewed

@@ -9,6 +9,7 @@
     "import sqlite3, json\n",
     "from contextlib import closing\n",
     "\n",
     "def load_questions(sqlite_filename):\n",
     "  all_questions = []\n",
     "  with closing(sqlite3.connect(sqlite_filename)) as db:\n",
@@ -82,30 +83,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
       "/home/makcrx/anaconda3/lib/python3.10/site-packages/sklearn/feature_extraction/text.py:528: UserWarning: The parameter 'token_pattern' will not be used since 'tokenizer' is not None'\n",
       "  warnings.warn(\n"
      ]
     },
     {
-     "data": {
-      "text/plain": [
-       "['почта россия трекинг']"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
-    "extract_keywords('пр трекинг')"
    ]
   },
   {

     "import sqlite3, json\n",
     "from contextlib import closing\n",
     "\n",
+    "# use test.py to update questions in db!!!\n",
     "def load_questions(sqlite_filename):\n",
     "  all_questions = []\n",
     "  with closing(sqlite3.connect(sqlite_filename)) as db:\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "/home/makcrx/anaconda3/lib/python3.10/site-packages/sklearn/feature_extraction/text.py:528: UserWarning: The parameter 'token_pattern' will not be used since 'tokenizer' is not None'\n",
+      "  warnings.warn(\n",
       "/home/makcrx/anaconda3/lib/python3.10/site-packages/sklearn/feature_extraction/text.py:528: UserWarning: The parameter 'token_pattern' will not be used since 'tokenizer' is not None'\n",
       "  warnings.warn(\n"
      ]
     },
     {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['яндекс доставка экспресс']\n",
+      "[]\n"
+     ]
     }
    ],
    "source": [
+    "print(extract_keywords('яд экспресс'))\n",
+    "print(extract_keywords('яндекс.доставка'))"
    ]
   },
   {

test.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import sqlite3, json
+from contextlib import closing
+# change THIS
+output_dir = 'faiss_qa_2023-08-09'
+model_name = "multi-qa-MiniLM-L6-cos-v1"
+punctuation = '!"#\'(),:;?[]^`}{'
+punctuation2 = '-/&._~+*=@<>[]\\'
+remove_punctuation = str.maketrans(punctuation2, ' ' * len(punctuation2), punctuation)
+def add_special_questions(questions):
+  questions.append({
+    "question": "Позови человека/менеджера",
+    "query": "Позови человека/менеджера",
+    "answer": "Переключаю на сотрудника, ожидайте",
+    "articleId": 0,
+  })
+def load_questions(sqlite_filename):
+  all_questions = []
+  with closing(sqlite3.connect(sqlite_filename)) as db:
+    db.row_factory = sqlite3.Row
+    with closing(db.cursor()) as cursor:
+      results = cursor.execute(
+        "SELECT id, articleId, title, category, section, questions FROM articles WHERE articleType = ? AND doNotUse IS NULL OR doNotUse = 0",
+        ('article',)
+      ).fetchall()
+      for res in results:
+        questions = json.loads(res['questions'])
+        for q in questions:
+          q['query'] = " ".join(res['section'].split() + res['title'].split() + q['question'].split()).translate(remove_punctuation).lower()
+          q['articleId'] = res['articleId']
+        all_questions += questions
+  add_special_questions(all_questions)
+  return all_questions
+print("Loading questions from db...")
+questions = load_questions("omnidesk-ai-chatgpt-questions.sqlite")
+# print(questions[0])
+from langchain.vectorstores import FAISS
+from langchain.docstore.document import Document
+from langchain.embeddings import SentenceTransformerEmbeddings
+docs = [
+  Document(page_content=q['query'], metadata={ 'answer': q['answer'], 'articleId': q['articleId'] })
+  for q in questions
+]
+print(f"Loading embeddings model {model_name}...")
+embeddings = SentenceTransformerEmbeddings(model_name=model_name)
+print("embedding documents...")
+db = FAISS.from_documents(docs, embeddings)
+db.save_local(output_dir)
+print('Saved!')