#!/usr/bin/env python3 # -*- coding:utf-8 -*- import streamlit as st from n4a_analytics_lib.analytics import (GlobalStatistics, IaaStatistics) TITLE = "NER4ARCHIVES Analytics" # Set application st.set_page_config(layout="wide") # sidebar: meta, inputs etc. sidebar = st.sidebar # cols: display results col1, col2 = st.columns(2) # description #sidebar.markdown(f"# 📏 {TITLE}") sidebar.markdown(f""" # 📏 {TITLE} A basic web application to display a dashboard for analyzing INCEpTION annotation project built in context of NER4Archives (Inria/Archives nationales). - This tool provides two statistics levels: - *Global project statistics*: Analyze named entities in overall curated documents in project; - *Inter-Annotator Agreement results*: Analyze results of IAA experiment. """) # Level to analyze option = sidebar.selectbox('Which statistics level?', ('Inter-Annotator Agreement results', 'Global project statistics')) # IAA results view if option == "Inter-Annotator Agreement results": annotations = sidebar.file_uploader("Upload IAA annotations (.zip format only): ") baseline_text = sidebar.file_uploader("Upload baseline text (.txt format only): ") if baseline_text is not None and annotations is not None: project_analyzed = IaaStatistics(zip_project=annotations, baseline_text=baseline_text.getvalue()) baseline_analyzer = project_analyzed.analyze_text() col2.markdown(f""" ### BASELINE TEXT: {baseline_text.name} - sentences: {baseline_analyzer[0]} - words: {baseline_analyzer[1]} - characters: {baseline_analyzer[2]} """) #print(project_analyzed.annotations_per_coders) commune_mentions = [l for i,j in project_analyzed.mentions_per_coder.items() for l in j] commune_mentions = list(dict.fromkeys(commune_mentions)) #print(commune_mentions) #print(project_analyzed.annotations) #print(project_analyzed.labels_per_coder) import pandas as pd from collections import defaultdict, Counter from itertools import combinations import seaborn as sn import matplotlib as plt import matplotlib.pyplot as pylt dicts_coders = [] for coder, annotations in project_analyzed.annotations_per_coders.items(): nombre_annotations = [] # print(f'* {coder}') for annotation, label in annotations.items(): nombre_annotations.append(label) # print(f"Nombre total d'annotations : {len(nombre_annotations)}") dict_coder = dict(Counter(nombre_annotations)) dicts_coders.append(dict_coder) # print(f'==========================') labels = [label for label in dicts_coders[0]] from n4a_analytics_lib.metrics_utils import interpret_kappa, fleiss_kappa_function, cohen_kappa_function df = pd.DataFrame(project_analyzed.annotations_per_coders, index=commune_mentions) for ann in project_analyzed.annotators: df[ann] = 'None' for mention, value in project_analyzed.annotations_per_coders[ann].items(): df.loc[mention, ann] = value total_annotations = len(df) # print(f'* Total des annotations : {total_annotations}') df_n = df.apply(pd.Series.value_counts, 1).fillna(0).astype(int) matrix = df_n.values pairs = list(combinations(project_analyzed.annotations_per_coders, 2)) # Display in app cont_kappa = st.container() cont_kappa.title("Inter-Annotator Agreement (IAA) results") tab1, tab2, tab3, tab4, tab5 = cont_kappa.tabs( ["📈 IAA metrics", "🗃 IAA Metrics Legend", "✔️ Agree annotations", "❌ Disagree annotations", "🏷️ Global Labels Statistics"]) tab1.subheader("Fleiss Kappa (global score for group):") tab1.markdown(interpret_kappa(round(fleiss_kappa_function(matrix), 2)), unsafe_allow_html=True) tab1.subheader("Cohen Kappa Annotators Matrix (score between annotators):") # tab1.dataframe(df) data = [] for coder_1, coder_2 in pairs: cohen_function = cohen_kappa_function(project_analyzed.labels_per_coder[coder_1], project_analyzed.labels_per_coder[coder_2]) data.append(((coder_1, coder_2), cohen_function)) tab1.markdown(f"* {coder_1} <> {coder_2} : {interpret_kappa(cohen_function)}", unsafe_allow_html=True) # print(f"* {coder_1} <> {coder_2} : {cohen_function}") intermediary = defaultdict(Counter) for (src, tgt), count in data: intermediary[src][tgt] = count letters = sorted({key for inner in intermediary.values() for key in inner} | set(intermediary.keys())) confusion_matrix = [[intermediary[src][tgt] for tgt in letters] for src in letters] import numpy as np df_cm = pd.DataFrame(confusion_matrix, letters, letters) mask = df_cm.values == 0 sn.set(font_scale=0.7) # for label size colors = ["#e74c3c", "#f39c12", "#f4d03f", "#5dade2", "#58d68d", "#28b463"] width = tab1.slider("matrix width", 1, 10, 14) height = tab1.slider("matrix height", 1, 10, 4) fig, ax = pylt.subplots(figsize=(width, height)) sn.heatmap(df_cm, cmap=colors, annot=True, mask=mask, annot_kws={"size": 7}, vmin=0, vmax=1, ax=ax) # font size # plt.show() tab1.pyplot(ax.figure) tab2.markdown("""
Kappa interpretation legend | |
---|---|
Kappa score(k) | Agreement |
k < 0 | Less chance agreement |
0.01 < k < 0.20 | Slight agreement |
0.21 < k < 0.40 | Fair agreement |
0.41 < k < 0.60 | Moderate agreement |
0.61 < k < 0.80 | Substantial agreement |
0.81 < k < 0.99 | Almost perfect agreement |