Spaces:
Sleeping
Sleeping
from whoosh.fields import TEXT, SchemaClass, ID | |
from jieba.analyse import ChineseAnalyzer | |
from whoosh.index import create_in | |
import json | |
analyzer = ChineseAnalyzer() | |
class ArticleSchema(SchemaClass): | |
index = ID(stored=True) | |
原文 = TEXT(stored=True, analyzer=analyzer) | |
注释 = TEXT(stored=True, analyzer=analyzer) | |
批判 = TEXT(stored=True, analyzer=analyzer) | |
章节 = TEXT(stored=True, analyzer=analyzer) | |
schema = ArticleSchema() | |
ix = create_in("indexdir", schema, indexname='article_index') | |
writer = ix.writer() | |
with open("反孔.json", encoding="utf-8") as json_file: | |
raw_jsons = json.load(json_file) | |
for vhjx_item in raw_jsons: | |
for jvvi_item in vhjx_item[1:]: | |
print(jvvi_item['index']) | |
writer.add_document(index=jvvi_item['index'], 原文=jvvi_item['原文'], | |
注释=jvvi_item['注释'] if "注释" in jvvi_item else "", 批判=jvvi_item['批判'] if "批判" in jvvi_item else "", 章节=vhjx_item[0]) | |
writer.commit() | |