t5-nynorsk-norbench / corpus /calucalate_f1.py
pere's picture
calculations
645191f
raw
history blame
927 Bytes
import json
import sys
from sklearn.metrics import accuracy_score, f1_score
# Check command-line arguments
if len(sys.argv) != 2:
print("Usage: python script.py <jsonl_file>")
sys.exit(1)
# Initialize lists to store targets and predictions
targets = []
predictions = []
# Read the JSON lines file
filename = sys.argv[1]
with open(filename, 'r') as f:
for line in f:
obj = json.loads(line)
targets.append(obj['target'])
predictions.append(obj['prediction'])
# Convert lists to integers for metrics calculation
targets = [int(x) for x in targets]
predictions = [int(x) for x in predictions]
# Calculate metrics
accuracy = accuracy_score(targets, predictions)
f1_macro = f1_score(targets, predictions, average='macro')
f1_micro = f1_score(targets, predictions, average='micro')
# Print results
print(f"Accuracy: {accuracy}")
print(f"F1 Macro: {f1_macro}")
print(f"F1 Micro: {f1_micro}")