Spaces:
Runtime error
Runtime error
import json | |
def dump_json(file, path): | |
"""Save json object""" | |
with open(path, 'w', encoding='utf-8') as f: | |
json.dump(file, f, indent=4, ensure_ascii=False) | |
print("Saved json to path: " + str(path)) | |
def load_json(path): | |
"""load json object""" | |
with open(path, 'rb') as f: | |
data = json.load(f) | |
print("Loaded json from path: " + str(path)) | |
return data | |
def lead_k_sentences(text, k=50): | |
"""Select the first k sentences from a Japanese document""" | |
DELIMITER = '。' | |
if DELIMITER in text: | |
segments = [seg for seg in text.split(DELIMITER)[:k] if len(seg) > 0] | |
return DELIMITER.join(segments) + DELIMITER | |
else: | |
return text | |
import jsonlines | |
def read_jsonlines(path): | |
with jsonlines.open(path) as reader: | |
lines = [obj for obj in reader] | |
return lines | |
def write_jsonlines(file, path): | |
with jsonlines.open(path, 'w') as writer: | |
writer.write_all(file) | |
import re | |
def normalize_text(s): | |
s = str(s) | |
# if not s.isupper(): | |
# s = re.sub(r"(\w)([A-Z])", r"\1 \2", s) # Respace | |
s = re.sub(r'\(.*\)', '', s) # Remove japanese brackets | |
s = re.sub(r'\(.*\)', '', s) # Remove english brackets | |
s = s.strip() | |
s = s.replace(' ', '_') | |
s = s.upper() | |
return s |