File size: 1,550 Bytes
960cd20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import ast
import json
import logging
import os.path

import jieba
import pypinyin
from pypinyin_dict.phrase_pinyin_data import large_pinyin
from pypinyin_dict.pinyin_data import cc_cedict

from contants import config

phrases_dict = {
    "一骑当千": [["yí"], ["jì"], ["dāng"], ["qiān"]],
    "桔子": [["jú"], ["zi"]],
    "重生": [["chóng"], ["shēng"]],
    "重重地": [["zhòng"], ["zhòng"], ["de"]],
    "自少时": [["zì"], ["shào"], ["shí"]],
}


def load_phrases_from_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            additional_phrases = ast.literal_eval(file.read())
            phrases_dict.update(additional_phrases)
            logging.info(f"Additional phrases loaded from {file_path}")
    except FileNotFoundError:
        logging.debug(f"File {file_path} not found. Create phrases_dict to {file_path}.")
        with open(file_path, 'w', encoding='utf-8') as file:
            file.write("""{
"一骑当千": [["yí"], ["jì"], ["dāng"], ["qiān"]],
"藏起": [["cáng"], ["qǐ"]],
}""")
    except Exception as e:
        logging.error(f"Error loading additional phrases from {file_path}: {str(e)}")


def phrases_dict_init():
    logging.info("Loading phrases_dict")
    large_pinyin.load()
    additional_phrases_file = os.path.join(config.abs_path, config.system.data_path, "phrases_dict.txt")
    load_phrases_from_file(additional_phrases_file)

    for word in phrases_dict.keys():
        jieba.add_word(word)
    pypinyin.load_phrases_dict(phrases_dict)