Spaces:
Runtime error
Runtime error
def evaluate_pr(system, gold, system_score_cutoff=0, k=5, verbosity=0): | |
""" | |
Returns the precision,recall and f1 score @k. | |
Also prints the precision,recall and f1 score @k=1 to 5. | |
Parameters | |
---------- | |
system : list of tuples | |
System output for sentence in form (position, score). | |
gold : list of tuple | |
Gold standard for sentence in form (position, score). | |
system_score_cutoff : float | |
Threshold of importance score for system output, deafaul to 0. | |
k : int | |
Top k recommendations to be evaluate on. | |
Returns | |
------- | |
tuple | |
A tuple contains precision, recall and f1 score for the system. | |
""" | |
# recommended by system and gold | |
system = [i for i in system if i[1] > system_score_cutoff] # have the flexibility to change the number of recommendation | |
gold = [i for i in gold if i[1] > 0] | |
if len(gold)>k: | |
n = len(gold) | |
else: | |
n = 0 | |
# sort | |
system.sort(key=lambda x: -x[1]) | |
gold_sent = [j[0] for j in gold] | |
# print("system:", system) | |
# print("gold:", gold) | |
for i in range(1, k + 1): # show how precision and recall change at different k | |
num_correct = 0 | |
if len(system)<i: | |
sys = system | |
else: | |
sys = system[:i] | |
for s in sys: | |
if s[0] in gold_sent: | |
num_correct+=1 | |
precision = num_correct / len(sys) | |
recall = num_correct / len(gold) | |
if verbosity > 0: | |
print("k=", i, "\nprecision=", precision, "\nrecall=", recall) | |
if n: | |
num_correct = 0 | |
sys = system[:n] | |
for s in sys: | |
if s[0] in gold_sent: | |
num_correct += 1 | |
precision = num_correct/len(sys) | |
recall = num_correct/len(gold) | |
if verbosity > 0: | |
print("k=", i, "\nprecision=", precision, "\nrecall=", recall) | |
try: | |
f_score = 2 * precision * recall / (precision + recall) | |
except: | |
f_score = 0 | |
if verbosity > 0: | |
print("f1 score=", f_score) | |
return (precision, recall, f_score) # return precision and recall at k=n, showing how the system performs by recommending the same number of sent as gold |