import sqlite3, json from contextlib import closing from extract_keywords import extract_keywords punctuation = '!"#\'(),:;?[]^`}{' punctuation2 = '-/&._~+*=@<>[]\\' remove_punctuation = str.maketrans(punctuation2, ' ' * len(punctuation2), punctuation) def load_questions(sqlite_filename): all_questions = [] with closing(sqlite3.connect(sqlite_filename)) as db: db.row_factory = sqlite3.Row with closing(db.cursor()) as cursor: results = cursor.execute( "SELECT id, articleId, title, category, section, questions FROM articles WHERE articleType = ? AND doNotUse IS NULL OR doNotUse = 0", ('article',) ).fetchall() for res in results: section = res['section'].lower() title = res['title'].lower() if section == 'служебная информация': section = '' title = '' questions = json.loads(res['questions']) for q in questions: q['query'] = " ".join(section.split() + title.split() + q['question'].split()).translate(remove_punctuation).lower() q['articleId'] = res['articleId'] all_questions += questions return all_questions #print("Loading questions from db...") #questions = load_questions("omnidesk-ai-chatgpt-questions.sqlite") #for q in questions: # keywords = extract_keywords(q['query']) # if (len(keywords) == 0): # print(q) # break