# Copyright (c) Microsoft Corporation. # Licensed under the MIT license. # -*- coding:utf-8 -*- import argparse from .bleu import corpus_bleu from .weighted_ngram_match import corpus_weighted_ngram_match from .syntax_match import corpus_syntax_match from .dataflow_match import corpus_dataflow_match import os def calculate(predictions, references, language="python", alpha=0.25, beta=0.25, gamma=0.25, theta=0.25): # preprocess inputs pre_references = [[s.strip() for s in my_list] for my_list in references] hypothesis = [s.strip() for s in predictions] for i in range(len(pre_references)): assert len(hypothesis) == len(pre_references[i]) references = [] for i in range(len(hypothesis)): ref_for_instance = [] for j in range(len(pre_references)): ref_for_instance.append(pre_references[j][i]) references.append(ref_for_instance) assert len(references) == len(pre_references)*len(hypothesis) # calculate ngram match (BLEU) tokenized_hyps = [x.split() for x in hypothesis] tokenized_refs = [[x.split() for x in reference] for reference in references] ngram_match_score = corpus_bleu(tokenized_refs,tokenized_hyps) # calculate weighted ngram match # from os import listdir # from os.path import isfile, join # onlyfiles = [f for f in listdir("./keywords") if isfile(join("keywords", f))] # print(onlyfiles) curr_path = os.path.dirname(os.path.abspath(__file__)) keywords = [x.strip() for x in open(curr_path + "/keywords/" + language +'.txt', 'r', encoding='utf-8').readlines()] def make_weights(reference_tokens, key_word_list): return {token:1 if token in key_word_list else 0.2 \ for token in reference_tokens} tokenized_refs_with_weights = [[[reference_tokens, make_weights(reference_tokens, keywords)]\ for reference_tokens in reference] for reference in tokenized_refs] weighted_ngram_match_score = corpus_weighted_ngram_match(tokenized_refs_with_weights,tokenized_hyps) # calculate syntax match syntax_match_score = corpus_syntax_match(references, hypothesis, language) # calculate dataflow match dataflow_match_score = corpus_dataflow_match(references, hypothesis, language) code_bleu_score = alpha*ngram_match_score\ + beta*weighted_ngram_match_score\ + gamma*syntax_match_score\ + theta*dataflow_match_score return { "ngram_match_score": ngram_match_score, "weighted_ngram_match_score": weighted_ngram_match_score, "syntax_match_score": syntax_match_score, "dataflow_match_score": dataflow_match_score, "code_bleu_score": code_bleu_score }