Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	| # -*- coding:utf-8 -*- | |
| """Collection of statistics functions. | |
| """ | |
| import numpy as np | |
| def percentage_agreement_pov(total_pov: int, total_annotations: int) -> float: | |
| """Computes a percentage | |
| :param total_pov: total agree/disagree annotations | |
| :type total_pov: int | |
| :param total_annotations: total annotations in project | |
| :type total_annotations: int | |
| :rtype: float | |
| :return: agreement percentage | |
| """ | |
| return round((total_pov / total_annotations) * 100, 2) | |
| def fleiss_kappa_function(matrix: list) -> float: | |
| """Computes Fleiss' kappa for group of annotators. | |
| :param matrix: a matrix of shape (:attr:'N', :attr:'k') with | |
| 'N' = number of subjects and 'k' = the number of categories. | |
| 'M[i, j]' represent the number of raters who assigned | |
| the 'i'th subject to the 'j'th category. | |
| :type matrix: numpy matrix | |
| :rtype: float | |
| :return: Fleiss' kappa score | |
| """ | |
| N, _ = matrix.shape # N is # of items, k is # of categories | |
| n_annotators = float(np.sum(matrix[0, :])) # # of annotators | |
| tot_annotations = N * n_annotators # the total # of annotations | |
| category_sum = np.sum(matrix, axis=0) # the sum of each category over all items | |
| # chance agreement | |
| p = category_sum / tot_annotations # the distribution of each category over all annotations | |
| PbarE = np.sum(p * p) # average chance agreement over all categories | |
| # observed agreement | |
| P = (np.sum(matrix * matrix, axis=1) - n_annotators) / (n_annotators * (n_annotators - 1)) | |
| Pbar = np.sum(P) / N | |
| # add all observed agreement | |
| # chances per item and divide by amount of items | |
| return round((Pbar - PbarE) / (1 - PbarE), 4) | |
| def cohen_kappa_function(ann1: list, ann2: list) -> float: | |
| """Computes Cohen kappa for pair-wise annotators. | |
| :param ann1: annotations provided by first annotator | |
| :type ann1: list | |
| :param ann2: annotations provided by second annotator | |
| :type ann2: list | |
| :rtype: float | |
| :return: Cohen kappa statistic | |
| """ | |
| count = 0 | |
| for an1, an2 in zip(ann1, ann2): | |
| if an1 == an2: | |
| count += 1 | |
| A = count / len(ann1) # observed agreement A (Po) | |
| uniq = set(ann1 + ann2) | |
| E = 0 # expected agreement E (Pe) | |
| for item in uniq: | |
| cnt1 = ann1.count(item) | |
| cnt2 = ann2.count(item) | |
| count = (cnt1 / len(ann1)) * (cnt2 / len(ann2)) | |
| E += count | |
| return round((A - E) / (1 - E), 4) | |
