File size: 946 Bytes
577164e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from nltk.translate.bleu_score import sentence_bleu,SmoothingFunction
from nltk.tokenize import word_tokenize
from utils.tokenizer import tokenize
import re

def is_korean(text):
    for char in text:
        if '가' <= char <= '힣':
            return True
    return False

def simple_score(text1, text2):
    text1 = re.sub("\n", " ", text1)
    text2 = re.sub("\n", " ", text2)
    if is_korean(text1):
        reference = tokenize(text1)
        candidate = tokenize(text2)
    else:
        reference = word_tokenize(text1.lower())
        candidate = word_tokenize(text2.lower())
    # base = sentence_bleu([reference], reference)
    score = sentence_bleu([reference], candidate, smoothing_function=SmoothingFunction().method2)
    return score


if __name__ == "__main__":
    lang = input('lang(en,ko)>')
    while True:
        ref = input("ref: ")
        cand = input("cand: ")
        print('score',simple_score(ref, cand, lang))