davidkim205's picture
Upload folder using huggingface_hub
577164e verified
from nltk.translate.bleu_score import sentence_bleu,SmoothingFunction
from nltk.tokenize import word_tokenize
from utils.tokenizer import tokenize
import re
def is_korean(text):
for char in text:
if '가' <= char <= '힣':
return True
return False
def simple_score(text1, text2):
text1 = re.sub("\n", " ", text1)
text2 = re.sub("\n", " ", text2)
if is_korean(text1):
reference = tokenize(text1)
candidate = tokenize(text2)
else:
reference = word_tokenize(text1.lower())
candidate = word_tokenize(text2.lower())
# base = sentence_bleu([reference], reference)
score = sentence_bleu([reference], candidate, smoothing_function=SmoothingFunction().method2)
return score
if __name__ == "__main__":
lang = input('lang(en,ko)>')
while True:
ref = input("ref: ")
cand = input("cand: ")
print('score',simple_score(ref, cand, lang))