{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# ISCO-08 hierarchical accuracy measure" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ISCO CSV file downloaded\n", "Weighted ISCO hierarchy dictionary created as isco_hierarchy\n", "\n", "The ISCO-08 Hierarchical Accuracy Measure is an implementation of the measure described in [Functional Annotation of Genes Using Hierarchical Text Categorization](https://www.researchgate.net/publication/44046343_Functional_Annotation_of_Genes_Using_Hierarchical_Text_Categorization) (Kiritchenko, Svetlana and Famili, Fazel. 2005) and adapted for the ISCO-08 classification scheme by the International Labour Organization.\n", "\n", "The measure rewards more precise classifications that correctly identify an occupation's placement down to the specific Unit group level and applies penalties for misclassifications based on the hierarchical distance between the correct and assigned categories.\n", "\n", "\n" ] } ], "source": [ "import evaluate\n", "\n", "ham = evaluate.load(\"/home/dux/workspace/1-IEA_RnD/isco_hierarchical_accuracy\")\n", "print(ham.description)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "References: ['1111', '1112', '1113', '1114', '1120']\n", "Predictions: ['1111', '1113', '1120', '1211', '2111']\n", "Accuracy: 0.2, Hierarchical Precision: 0.5, Hierarchical Recall: 0.7777777777777778, Hierarchical F-measure: 0.6086956521739131\n", "{'accuracy': 0.2, 'hierarchical_precision': 0.5, 'hierarchical_recall': 0.7777777777777778, 'hierarchical_fmeasure': 0.6086956521739131}\n" ] } ], "source": [ "references = [\"1111\", \"1112\", \"1113\", \"1114\", \"1120\"]\n", "predictions = [\"1111\", \"1113\", \"1120\", \"1211\", \"2111\"]\n", "\n", "print(f\"References: {references}\")\n", "print(f\"Predictions: {predictions}\")\n", "print(ham.compute(references=references, predictions=predictions))" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TEST CASE #1\n", "References: ['1111', '1111', '1111', '1111', '1111', '1111', '1111', '1111', '1111', '1111']\n", "Predictions: ['1111', '1112', '1120', '1211', '1311', '2111', '111', '11', '1', '9999']\n", "Accuracy: 0.1, Hierarchical Precision: 0.2222222222222222, Hierarchical Recall: 1.0, Hierarchical F-measure: 0.3636363636363636\n", "{'accuracy': 0.1, 'hierarchical_precision': 0.2222222222222222, 'hierarchical_recall': 1.0, 'hierarchical_fmeasure': 0.3636363636363636}\n", "\n", "TEST CASE #2\n", "References: ['1111']\n", "Predictions: ['1111']\n", "Accuracy: 1.0, Hierarchical Precision: 1.0, Hierarchical Recall: 1.0, Hierarchical F-measure: 1.0\n", "{'accuracy': 1.0, 'hierarchical_precision': 1.0, 'hierarchical_recall': 1.0, 'hierarchical_fmeasure': 1.0}\n", "\n", "TEST CASE #3\n", "References: ['1111']\n", "Predictions: ['1112']\n", "Accuracy: 0.0, Hierarchical Precision: 0.75, Hierarchical Recall: 0.75, Hierarchical F-measure: 0.75\n", "{'accuracy': 0.0, 'hierarchical_precision': 0.75, 'hierarchical_recall': 0.75, 'hierarchical_fmeasure': 0.75}\n", "\n", "TEST CASE #4\n", "References: ['1111']\n", "Predictions: ['1120']\n", "Accuracy: 0.0, Hierarchical Precision: 0.5, Hierarchical Recall: 0.5, Hierarchical F-measure: 0.5\n", "{'accuracy': 0.0, 'hierarchical_precision': 0.5, 'hierarchical_recall': 0.5, 'hierarchical_fmeasure': 0.5}\n", "\n", "TEST CASE #5\n", "References: ['1111']\n", "Predictions: ['1211']\n", "Accuracy: 0.0, Hierarchical Precision: 0.25, Hierarchical Recall: 0.25, Hierarchical F-measure: 0.25\n", "{'accuracy': 0.0, 'hierarchical_precision': 0.25, 'hierarchical_recall': 0.25, 'hierarchical_fmeasure': 0.25}\n", "\n", "TEST CASE #6\n", "References: ['1111']\n", "Predictions: ['1311']\n", "Accuracy: 0.0, Hierarchical Precision: 0.25, Hierarchical Recall: 0.25, Hierarchical F-measure: 0.25\n", "{'accuracy': 0.0, 'hierarchical_precision': 0.25, 'hierarchical_recall': 0.25, 'hierarchical_fmeasure': 0.25}\n", "\n", "TEST CASE #7\n", "References: ['1111']\n", "Predictions: ['2111']\n", "Accuracy: 0.0, Hierarchical Precision: 0.0, Hierarchical Recall: 0.0, Hierarchical F-measure: 0\n", "{'accuracy': 0.0, 'hierarchical_precision': 0.0, 'hierarchical_recall': 0.0, 'hierarchical_fmeasure': 0}\n", "\n", "TEST CASE #8\n", "References: ['1111']\n", "Predictions: ['111']\n", "Accuracy: 0.0, Hierarchical Precision: 1.0, Hierarchical Recall: 0.25, Hierarchical F-measure: 0.4\n", "{'accuracy': 0.0, 'hierarchical_precision': 1.0, 'hierarchical_recall': 0.25, 'hierarchical_fmeasure': 0.4}\n", "\n", "TEST CASE #9\n", "References: ['1111']\n", "Predictions: ['11']\n", "Accuracy: 0.0, Hierarchical Precision: 1.0, Hierarchical Recall: 0.25, Hierarchical F-measure: 0.4\n", "{'accuracy': 0.0, 'hierarchical_precision': 1.0, 'hierarchical_recall': 0.25, 'hierarchical_fmeasure': 0.4}\n", "\n", "TEST CASE #10\n", "References: ['1111']\n", "Predictions: ['1']\n", "Accuracy: 0.0, Hierarchical Precision: 1.0, Hierarchical Recall: 0.25, Hierarchical F-measure: 0.4\n", "{'accuracy': 0.0, 'hierarchical_precision': 1.0, 'hierarchical_recall': 0.25, 'hierarchical_fmeasure': 0.4}\n", "\n", "TEST CASE #11\n", "References: ['1111']\n", "Predictions: ['9999']\n", "Accuracy: 0.0, Hierarchical Precision: 0.0, Hierarchical Recall: 0.0, Hierarchical F-measure: 0\n", "{'accuracy': 0.0, 'hierarchical_precision': 0.0, 'hierarchical_recall': 0.0, 'hierarchical_fmeasure': 0}\n", "\n" ] } ], "source": [ "# Compute all test cases and print the results\n", "from tests import test_cases\n", "\n", "test_number = 1\n", "\n", "for test_case in test_cases:\n", " references = test_case[\"references\"]\n", " predictions = test_case[\"predictions\"]\n", " print(f\"TEST CASE #{test_number}\")\n", " print(f\"References: {references}\")\n", " print(f\"Predictions: {predictions}\")\n", " print(ham.compute(references=references, predictions=predictions))\n", " print()\n", " test_number += 1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Model evaluation using the test split of the dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from datasets import load_dataset, get_dataset_config_names, get_dataset_infos, get_dataset_split_names\n", "\n", "dataset = load_dataset(\"ICILS/multilingual_parental_occupations\", \"ilo\")\n", "dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4634a4a344384ef28d182adeea1f5afc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading builder script: 0%| | 0.00/13.4k [00:00