#!/usr/bin/env python3 # -*- coding:utf-8 -*- import streamlit as st from n4a_analytics_lib.analytics import (GlobalStatistics, IaaStatistics) TITLE = "NER4ARCHIVES Analytics" # Set application st.set_page_config(layout="wide") # sidebar: meta, inputs etc. sidebar = st.sidebar # cols: display results col1, col2 = st.columns(2) # description #sidebar.markdown(f"# 📏 {TITLE}") sidebar.markdown(f""" # 📏 {TITLE} A basic web application to display a dashboard for analyzing INCEpTION annotation project built in context of NER4Archives (Inria/Archives nationales). - This tool provides two statistics levels: - *Global project statistics*: Analyze named entities in overall curated documents in project; - *Inter-Annotator Agreement results*: Analyze results of IAA experiment. """) # Level to analyze option = sidebar.selectbox('Which statistics level?', ('Inter-Annotator Agreement results', 'Global project statistics')) # IAA results view if option == "Inter-Annotator Agreement results": annotations = sidebar.file_uploader("Upload IAA annotations (.zip format only): ") baseline_text = sidebar.file_uploader("Upload baseline text (.txt format only): ") if baseline_text is not None and annotations is not None: project_analyzed = IaaStatistics(zip_project=annotations, baseline_text=baseline_text.getvalue()) baseline_analyzer = project_analyzed.analyze_text() col2.markdown(f""" ### BASELINE TEXT: {baseline_text.name} - sentences: {baseline_analyzer[0]} - words: {baseline_analyzer[1]} - characters: {baseline_analyzer[2]} """) #print(project_analyzed.annotations_per_coders) commune_mentions = [l for i,j in project_analyzed.mentions_per_coder.items() for l in j] commune_mentions = list(dict.fromkeys(commune_mentions)) #print(commune_mentions) #print(project_analyzed.annotations) #print(project_analyzed.labels_per_coder) import pandas as pd from collections import defaultdict, Counter from itertools import combinations import seaborn as sn import matplotlib as plt import matplotlib.pyplot as pylt dicts_coders = [] for coder, annotations in project_analyzed.annotations_per_coders.items(): nombre_annotations = [] # print(f'* {coder}') for annotation, label in annotations.items(): nombre_annotations.append(label) # print(f"Nombre total d'annotations : {len(nombre_annotations)}") dict_coder = dict(Counter(nombre_annotations)) dicts_coders.append(dict_coder) # print(f'==========================') labels = [label for label in dicts_coders[0]] from n4a_analytics_lib.metrics_utils import interpret_kappa, fleiss_kappa_function, cohen_kappa_function df = pd.DataFrame(project_analyzed.annotations_per_coders, index=commune_mentions) for ann in project_analyzed.annotators: df[ann] = 'None' for mention, value in project_analyzed.annotations_per_coders[ann].items(): df.loc[mention, ann] = value total_annotations = len(df) # print(f'* Total des annotations : {total_annotations}') df_n = df.apply(pd.Series.value_counts, 1).fillna(0).astype(int) matrix = df_n.values pairs = list(combinations(project_analyzed.annotations_per_coders, 2)) # Display in app cont_kappa = st.container() cont_kappa.title("Inter-Annotator Agreement (IAA) results") tab1, tab2, tab3, tab4, tab5 = cont_kappa.tabs( ["📈 IAA metrics", "🗃 IAA Metrics Legend", "✔️ Agree annotations", "❌ Disagree annotations", "🏷️ Global Labels Statistics"]) tab1.subheader("Fleiss Kappa (global score for group):") tab1.markdown(interpret_kappa(round(fleiss_kappa_function(matrix), 2)), unsafe_allow_html=True) tab1.subheader("Cohen Kappa Annotators Matrix (score between annotators):") # tab1.dataframe(df) data = [] for coder_1, coder_2 in pairs: cohen_function = cohen_kappa_function(project_analyzed.labels_per_coder[coder_1], project_analyzed.labels_per_coder[coder_2]) data.append(((coder_1, coder_2), cohen_function)) tab1.markdown(f"* {coder_1} <> {coder_2} : {interpret_kappa(cohen_function)}", unsafe_allow_html=True) # print(f"* {coder_1} <> {coder_2} : {cohen_function}") intermediary = defaultdict(Counter) for (src, tgt), count in data: intermediary[src][tgt] = count letters = sorted({key for inner in intermediary.values() for key in inner} | set(intermediary.keys())) confusion_matrix = [[intermediary[src][tgt] for tgt in letters] for src in letters] import numpy as np df_cm = pd.DataFrame(confusion_matrix, letters, letters) mask = df_cm.values == 0 sn.set(font_scale=0.7) # for label size colors = ["#e74c3c", "#f39c12", "#f4d03f", "#5dade2", "#58d68d", "#28b463"] width = tab1.slider("matrix width", 1, 10, 14) height = tab1.slider("matrix height", 1, 10, 4) fig, ax = pylt.subplots(figsize=(width, height)) sn.heatmap(df_cm, cmap=colors, annot=True, mask=mask, annot_kws={"size": 7}, vmin=0, vmax=1, ax=ax) # font size # plt.show() tab1.pyplot(ax.figure) tab2.markdown("""
Kappa interpretation legend
Kappa score(k) Agreement
k < 0 Less chance agreement
0.01 < k < 0.20 Slight agreement
0.21 < k < 0.40 Fair agreement
0.41 < k < 0.60 Moderate agreement
0.61 < k < 0.80 Substantial agreement
0.81 < k < 0.99 Almost perfect agreement
""" , unsafe_allow_html = True) ## commune @st.cache def convert_df(df_ex): return df_ex.to_csv(encoding="utf-8").encode('utf-8') ## Agree part columns_to_compare = project_analyzed.annotators def check_all_equal(iterator): return len(set(iterator)) <= 1 df_agree = df[df[columns_to_compare].apply(lambda row: check_all_equal(row), axis=1)] total_unanime = len(df_agree) csv_agree = convert_df(df_agree) tab3.subheader("Total agree annotations:") tab3.markdown(f"{total_unanime} / {len(df)} annotations ({round((total_unanime / len(df)) * 100, 2)} %)") tab3.download_button( "Press to Download CSV", csv_agree, "csv_annotators_agree.csv", "text/csv", key='download-csv-1' ) tab3.dataframe(df_agree) ## Disagree part def check_all_not_equal(iterator): return len(set(iterator)) > 1 df_disagree = df[df[columns_to_compare].apply(lambda row: check_all_not_equal(row), axis=1)] total_desaccord = len(df_disagree) csv_disagree = convert_df(df_disagree) tab4.subheader("Total disagree annotations:") tab4.markdown( f"{total_desaccord} / {len(df)} annotations ({round((total_desaccord / len(df)) * 100, 2)} %)") tab4.download_button( "Press to Download CSV", csv_disagree, "csv_annotators_disagree.csv", "text/csv", key='download-csv-2' ) tab4.dataframe(df_disagree) ## alignement chart labels def count_total_annotations_label(dataframe, labels): pairs = [] for label in labels: total = dataframe.astype(object).eq(label).any(1).sum() pairs.append((label, total)) return pairs totals_annotations_per_labels = count_total_annotations_label(df, labels) # Récupérer le nombre de mention portant la même classe selon les annotateurs def total_agree_disagree_per_label(dataframe, pairs_totals_labels): new_pairs = [] for t in pairs_totals_labels: # t[0] : label # t[1] : total_rows_with_label agree_res = df[df.nunique(1).eq(1)].eq(t[0]).any(1).sum() disagree_res = t[1] - agree_res agree_percent = (agree_res / t[1]) * 100 disagree_percent = (disagree_res / t[1]) * 100 new_pairs.append((t[0], t[1], agree_percent, disagree_percent)) return new_pairs to_pie = total_agree_disagree_per_label(df, totals_annotations_per_labels) def plot_pies(tasks_to_pie): my_labels = 'agree', 'disagree' my_colors = ['#47DBCD', '#F5B14C'] my_explode = (0, 0.1) counter = 0 fig, axes = pylt.subplots(1, len(tasks_to_pie), figsize=(20, 3)) for t in tasks_to_pie: tasks = [t[2], t[3]] axes[counter].pie(tasks, autopct='%1.1f%%', startangle=15, shadow=True, colors=my_colors, explode=my_explode) axes[counter].set_title(t[0]) axes[counter].axis('equal') counter += 1 fig.set_facecolor("white") fig.legend(labels=my_labels, loc="center right", borderaxespad=0.1, title="Labels alignement") # plt.savefig(f'./out/pie_alignement_labels_{filename_no_extension}.png', dpi=400) return fig f = plot_pies(to_pie) tab5.pyplot(f.figure) # global project results view # to st components def clear_cache(): st.session_state["p_a"] = None if option == "Global project statistics": project = sidebar.file_uploader("Project folder that contains curated annotations in XMI 1.1 (.zip format only) : ", on_change=clear_cache) if project is not None: if st.session_state["p_a"] is None: st.session_state["p_a"] = GlobalStatistics(zip_project=project) if st.session_state["p_a"] is not None: with st.expander('Details on data'): col1.metric("Total curated annotations", f"{st.session_state['p_a'].total_annotations_project} Named entities") col1.dataframe(st.session_state['p_a'].df_i) selected_data = col1.selectbox('Select specific data to display bar plot:', st.session_state['p_a'].documents) col2.pyplot(st.session_state['p_a'].create_plot(selected_data))