Spaces:
Running
Running
File size: 1,550 Bytes
960cd20 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import ast
import json
import logging
import os.path
import jieba
import pypinyin
from pypinyin_dict.phrase_pinyin_data import large_pinyin
from pypinyin_dict.pinyin_data import cc_cedict
from contants import config
phrases_dict = {
"一骑当千": [["yí"], ["jì"], ["dāng"], ["qiān"]],
"桔子": [["jú"], ["zi"]],
"重生": [["chóng"], ["shēng"]],
"重重地": [["zhòng"], ["zhòng"], ["de"]],
"自少时": [["zì"], ["shào"], ["shí"]],
}
def load_phrases_from_file(file_path):
try:
with open(file_path, 'r', encoding='utf-8') as file:
additional_phrases = ast.literal_eval(file.read())
phrases_dict.update(additional_phrases)
logging.info(f"Additional phrases loaded from {file_path}")
except FileNotFoundError:
logging.debug(f"File {file_path} not found. Create phrases_dict to {file_path}.")
with open(file_path, 'w', encoding='utf-8') as file:
file.write("""{
"一骑当千": [["yí"], ["jì"], ["dāng"], ["qiān"]],
"藏起": [["cáng"], ["qǐ"]],
}""")
except Exception as e:
logging.error(f"Error loading additional phrases from {file_path}: {str(e)}")
def phrases_dict_init():
logging.info("Loading phrases_dict")
large_pinyin.load()
additional_phrases_file = os.path.join(config.abs_path, config.system.data_path, "phrases_dict.txt")
load_phrases_from_file(additional_phrases_file)
for word in phrases_dict.keys():
jieba.add_word(word)
pypinyin.load_phrases_dict(phrases_dict)
|