kg_reasoning_demo / utils.py
naveed-stockmark's picture
Upload 3 files
eb30e6c verified
raw
history blame
1.31 kB
import json
def dump_json(file, path):
"""Save json object"""
with open(path, 'w', encoding='utf-8') as f:
json.dump(file, f, indent=4, ensure_ascii=False)
print("Saved json to path: " + str(path))
def load_json(path):
"""load json object"""
with open(path, 'rb') as f:
data = json.load(f)
print("Loaded json from path: " + str(path))
return data
def lead_k_sentences(text, k=50):
"""Select the first k sentences from a Japanese document"""
DELIMITER = '。'
if DELIMITER in text:
segments = [seg for seg in text.split(DELIMITER)[:k] if len(seg) > 0]
return DELIMITER.join(segments) + DELIMITER
else:
return text
import jsonlines
def read_jsonlines(path):
with jsonlines.open(path) as reader:
lines = [obj for obj in reader]
return lines
def write_jsonlines(file, path):
with jsonlines.open(path, 'w') as writer:
writer.write_all(file)
import re
def normalize_text(s):
s = str(s)
# if not s.isupper():
# s = re.sub(r"(\w)([A-Z])", r"\1 \2", s) # Respace
s = re.sub(r'\(.*\)', '', s) # Remove japanese brackets
s = re.sub(r'\(.*\)', '', s) # Remove english brackets
s = s.strip()
s = s.replace(' ', '_')
s = s.upper()
return s