Spaces:
Runtime error
Runtime error
import sqlite3, json | |
from contextlib import closing | |
# change THIS | |
output_dir = 'faiss_qa_2023-08-20' | |
model_name = "multi-qa-MiniLM-L6-cos-v1" | |
punctuation = '!"#\'(),:;?[]^`}{' | |
punctuation2 = '-/&._~+*=@<>[]\\' | |
remove_punctuation = str.maketrans(punctuation2, ' ' * len(punctuation2), punctuation) | |
def load_questions(sqlite_filename): | |
all_questions = [] | |
with closing(sqlite3.connect(sqlite_filename)) as db: | |
db.row_factory = sqlite3.Row | |
with closing(db.cursor()) as cursor: | |
results = cursor.execute( | |
"SELECT id, articleId, title, category, section, questions FROM articles WHERE articleType = ? AND doNotUse IS NULL OR doNotUse = 0", | |
('article',) | |
).fetchall() | |
for res in results: | |
section = res['section'].lower() | |
title = res['title'].lower() | |
if section == 'служебная информация': | |
section = '' | |
title = '' | |
questions = json.loads(res['questions']) | |
for q in questions: | |
q['query'] = " ".join(section.split() + title.split() + q['question'].split()).translate(remove_punctuation).lower() | |
q['articleId'] = res['articleId'] | |
all_questions += questions | |
return all_questions | |
print("Loading questions from db...") | |
questions = load_questions("omnidesk-ai-chatgpt-questions.sqlite") | |
# print(questions[0]) | |
from langchain.vectorstores import FAISS | |
from langchain.docstore.document import Document | |
from langchain.embeddings import SentenceTransformerEmbeddings | |
docs = [ | |
Document(page_content=q['query'], metadata={ 'answer': q['answer'], 'articleId': q['articleId'] }) | |
for q in questions | |
] | |
print(f"Loading embeddings model {model_name}...") | |
embeddings = SentenceTransformerEmbeddings(model_name=model_name) | |
print("embedding documents...") | |
db = FAISS.from_documents(docs, embeddings) | |
db.save_local(output_dir) | |
print('Saved!') |