Nihal D'Souza
Final app release
e41b03f
def evaluate_pr(system, gold, system_score_cutoff=0, k=5, verbosity=0):
"""
Returns the precision,recall and f1 score @k.
Also prints the precision,recall and f1 score @k=1 to 5.
Parameters
----------
system : list of tuples
System output for sentence in form (position, score).
gold : list of tuple
Gold standard for sentence in form (position, score).
system_score_cutoff : float
Threshold of importance score for system output, deafaul to 0.
k : int
Top k recommendations to be evaluate on.
Returns
-------
tuple
A tuple contains precision, recall and f1 score for the system.
"""
# recommended by system and gold
system = [i for i in system if i[1] > system_score_cutoff] # have the flexibility to change the number of recommendation
gold = [i for i in gold if i[1] > 0]
if len(gold)>k:
n = len(gold)
else:
n = 0
# sort
system.sort(key=lambda x: -x[1])
gold_sent = [j[0] for j in gold]
# print("system:", system)
# print("gold:", gold)
for i in range(1, k + 1): # show how precision and recall change at different k
num_correct = 0
if len(system)<i:
sys = system
else:
sys = system[:i]
for s in sys:
if s[0] in gold_sent:
num_correct+=1
precision = num_correct / len(sys)
recall = num_correct / len(gold)
if verbosity > 0:
print("k=", i, "\nprecision=", precision, "\nrecall=", recall)
if n:
num_correct = 0
sys = system[:n]
for s in sys:
if s[0] in gold_sent:
num_correct += 1
precision = num_correct/len(sys)
recall = num_correct/len(gold)
if verbosity > 0:
print("k=", i, "\nprecision=", precision, "\nrecall=", recall)
try:
f_score = 2 * precision * recall / (precision + recall)
except:
f_score = 0
if verbosity > 0:
print("f1 score=", f_score)
return (precision, recall, f_score) # return precision and recall at k=n, showing how the system performs by recommending the same number of sent as gold