{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "c88f989c", "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ['CUDA_VISIBLE_DEVICES']='7'" ] }, { "cell_type": "code", "execution_count": 2, "id": "bfdbe247", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2023-02-26 02:35:07.275938: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2023-02-26 02:35:07.472394: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", "2023-02-26 02:35:07.472434: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", "2023-02-26 02:35:07.503598: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "2023-02-26 02:35:08.603575: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", "2023-02-26 02:35:08.603678: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", "2023-02-26 02:35:08.603689: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", "2023-02-26 02:35:15.326595: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", "2023-02-26 02:35:15.326728: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory\n", "2023-02-26 02:35:15.326831: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory\n", "2023-02-26 02:35:15.327013: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusolver.so.11'; dlerror: libcusolver.so.11: cannot open shared object file: No such file or directory\n", "2023-02-26 02:35:15.327108: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusparse.so.11'; dlerror: libcusparse.so.11: cannot open shared object file: No such file or directory\n", "2023-02-26 02:35:15.327205: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory\n", "2023-02-26 02:35:15.327224: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1934] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n", "Skipping registering GPU devices...\n" ] } ], "source": [ "from transformers import AutoTokenizer\n", "import re\n", "import numpy as np\n", "from random import Random\n", "import torch\n", "import pandas as pd\n", "import spacy\n", "import random\n", "from datasets import load_dataset\n", "from transformers import (\n", " AutoModelForTokenClassification,\n", " AutoTokenizer,\n", " DataCollatorForTokenClassification,\n", " TrainingArguments,\n", " Trainer,\n", " set_seed)\n", "import numpy as np\n", "import datasets\n", "from collections import defaultdict\n", "from datasets import load_metric" ] }, { "cell_type": "code", "execution_count": 3, "id": "7a916e9f", "metadata": {}, "outputs": [], "source": [ "# !pip install seqeval" ] }, { "cell_type": "code", "execution_count": 4, "id": "4b0590b7", "metadata": {}, "outputs": [], "source": [ "per_device_train_batch_size = 16\n", "per_device_eval_batch_size = 32\n", "num_train_epochs = 5\n", "weight_decay = 0.1\n", "warmup_ratio = 0.1\n", "learning_rate = 5e-5\n", "load_best_model_at_end = True\n", "output_dir = \"../akoksal/earthquake_ner_models/\"\n", "old_data_path = \"annotated_address_dataset_07022023_766train_192test/\"\n", "data_path = \"deprem-private/ner_v12\"\n", "cache_dir = \"../akoksal/hf_cache\"\n", "saved_models_path = \"../akoksal/earthquake_ner_models/\"\n", "device = \"cuda\"\n", "seed = 42\n", "model_names = [\"dbmdz/bert-base-turkish-cased\",\n", " \"dbmdz/electra-base-turkish-mc4-cased-discriminator\",\n", " \"dbmdz/bert-base-turkish-128k-cased\",\n", " \"dbmdz/convbert-base-turkish-cased\",\n", " \"bert-base-multilingual-cased\",\n", " \"xlm-roberta-base\"]\n", "model_name = model_names[2]" ] }, { "cell_type": "code", "execution_count": 5, "id": "9aeb3dbe", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'dbmdz/bert-base-turkish-128k-cased'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_name" ] }, { "cell_type": "code", "execution_count": 6, "id": "ffeb73e4", "metadata": {}, "outputs": [], "source": [ "set_seed(seed)" ] }, { "cell_type": "code", "execution_count": 7, "id": "a876c516", "metadata": {}, "outputs": [], "source": [ "id2label = {\n", " 0: \"O\",\n", " 1: \"B-bina\",\n", " 2: \"I-bina\",\n", " 3: \"B-bulvar\",\n", " 4: \"I-bulvar\",\n", " 5: \"B-cadde\",\n", " 6: \"I-cadde\",\n", " 7: \"B-diskapino\",\n", " 8: \"I-diskapino\",\n", " 9: \"B-ilce\",\n", " 10: \"I-ilce\",\n", " 11: \"B-isim\",\n", " 12: \"I-isim\",\n", " 13: \"B-mahalle\",\n", " 14: \"I-mahalle\",\n", " 15: \"B-sehir\",\n", " 16: \"I-sehir\",\n", " 17: \"B-site\",\n", " 18: \"I-site\",\n", " 19: \"B-sokak\",\n", " 20: \"I-sokak\",\n", " 21: \"B-soyisim\",\n", " 22: \"I-soyisim\",\n", " 23: \"B-telefonno\",\n", " 24: \"I-telefonno\",\n", "}\n", "\n", "label2id = {label: idx for idx, label in id2label.items()}\n", "label_names = list(label2id.keys())" ] }, { "cell_type": "code", "execution_count": 8, "id": "2e0caffc", "metadata": {}, "outputs": [], "source": [ "# from huggingface_hub import login\n", "# login()" ] }, { "cell_type": "code", "execution_count": 9, "id": "c74850f9", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at dbmdz/bert-base-turkish-128k-cased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']\n", "- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertForTokenClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-128k-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "model = AutoModelForTokenClassification.from_pretrained(model_name,\n", " num_labels=len(label_names),\n", " id2label=id2label,\n", " cache_dir=cache_dir).to(device)" ] }, { "cell_type": "code", "execution_count": 10, "id": "4c1fe653", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using custom data configuration deprem-private--ner_v12-e2f61c5a18a7a738\n", "Found cached dataset text (/mounts/Users/cisintern/akoksal/.cache/huggingface/datasets/deprem-private___text/deprem-private--ner_v12-e2f61c5a18a7a738/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "22bc5f5f97204b41b2bc5dc3b71036e1", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "raw_dataset = datasets.load_dataset(\"deprem-private/ner_v12\", use_auth_token=True)\n", "\n", "new_dataset_json = {}\n", "for split in [\"train\", \"validation\", \"test\"]:\n", " ids = []\n", " sentences = []\n", " labels = []\n", " ids = []\n", " cur_idx = 0\n", " unique_labels = set()\n", " temp_sent = []\n", " temp_labels = []\n", " for word in raw_dataset[split][\"text\"]:\n", " \n", " if word!=\"\":\n", " temp_sent.append((word.split()[0]))\n", " temp_labels.append(label2id[(word.split()[1])])\n", " else:\n", " sentences.append(temp_sent)\n", " labels.append(temp_labels)\n", " ids.append(cur_idx)\n", " cur_idx+=1\n", " temp_sent = []\n", " temp_labels = []\n", " new_dataset_json[split] = {\"tokens\":sentences, \"ner_tags\":labels, \"ids\":ids}\n", "\n", "dataset = datasets.DatasetDict()\n", "# using your `Dict` object\n", "for k,v in new_dataset_json.items():\n", " dataset[k] = datasets.Dataset.from_dict(v)" ] }, { "cell_type": "code", "execution_count": 11, "id": "65a66af9", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a403f5fadb3041f4b18acc7ec41a2d36", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1 [00:00, ?ba/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e2410f6106514cfd8207d8b42748c66d", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1 [00:00, ?ba/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "227e163e07b2414da9abdbe11cb0c6bf", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1 [00:00, ?ba/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# dataset = datasets.load_from_disk(old_data_path)\n", "def tokenize_and_align_labels(examples):\n", " tokenized_inputs = tokenizer(examples[\"tokens\"], truncation=True, is_split_into_words=True)\n", "\n", " labels = []\n", " for i, label in enumerate(examples[f\"ner_tags\"]):\n", " word_ids = tokenized_inputs.word_ids(batch_index=i) # Map tokens to their respective word.\n", " previous_word_idx = None\n", " label_ids = []\n", " for word_idx in word_ids: # Set the special tokens to -100.\n", " if word_idx is None:\n", " label_ids.append(-100)\n", " elif word_idx != previous_word_idx: # Only label the first token of a given word.\n", " label_ids.append(label[word_idx])\n", " else:\n", " label_ids.append(-100)\n", " previous_word_idx = word_idx\n", " labels.append(label_ids)\n", "\n", " tokenized_inputs[\"labels\"] = labels\n", " return tokenized_inputs\n", "\n", "tokenized_dataset = dataset.map(tokenize_and_align_labels, batched=True)" ] }, { "cell_type": "code", "execution_count": 12, "id": "6b43934d", "metadata": {}, "outputs": [], "source": [ "data_collator = DataCollatorForTokenClassification(tokenizer)" ] }, { "cell_type": "code", "execution_count": 13, "id": "c24f52db", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_2652487/885599324.py:1: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n", " metric = load_metric(\"seqeval\")\n" ] } ], "source": [ "metric = load_metric(\"seqeval\")\n", "def compute_metrics(p):\n", " predictions, labels = p\n", " predictions = np.argmax(predictions, axis=2)\n", "\n", " # Remove ignored index (special tokens)\n", " true_predictions = [\n", " [label_names[p] for (p, l) in zip(prediction, label) if l != -100]\n", " for prediction, label in zip(predictions, labels)\n", " ]\n", " true_labels = [\n", " [label_names[l] for (p, l) in zip(prediction, label) if l != -100]\n", " for prediction, label in zip(predictions, labels)\n", " ]\n", "\n", " results = metric.compute(predictions=true_predictions, references=true_labels)\n", " flattened_results = {\n", " \"overall_precision\": results[\"overall_precision\"],\n", " \"overall_recall\": results[\"overall_recall\"],\n", " \"overall_f1\": results[\"overall_f1\"],\n", " \"overall_accuracy\": results[\"overall_accuracy\"],\n", " }\n", " for k in results.keys():\n", " if(k not in flattened_results.keys()):\n", " flattened_results[k+\"_f1\"]=results[k][\"f1\"]\n", " flattened_results[k+\"_recall\"]=results[k][\"recall\"]\n", " flattened_results[k+\"_precision\"]=results[k][\"precision\"]\n", " flattened_results[k+\"_support\"]=results[k][\"number\"]\n", "\n", " return flattened_results" ] }, { "cell_type": "code", "execution_count": 14, "id": "a955fd51", "metadata": {}, "outputs": [], "source": [ "training_args = TrainingArguments(\n", " output_dir=saved_models_path,\n", " evaluation_strategy=\"epoch\",\n", " learning_rate=learning_rate,\n", " per_device_train_batch_size=per_device_train_batch_size,\n", " per_device_eval_batch_size=per_device_eval_batch_size,\n", " num_train_epochs=num_train_epochs,\n", " warmup_ratio=warmup_ratio,\n", " weight_decay=weight_decay,\n", " run_name = \"turkish_ner\",\n", " save_strategy='epoch',\n", " logging_strategy=\"epoch\",\n", " save_total_limit=3,\n", " load_best_model_at_end=load_best_model_at_end,\n", " \n", ")\n", "trainer = Trainer(\n", " model=model,\n", " args=training_args,\n", " train_dataset=tokenized_dataset[\"train\"],\n", " eval_dataset=tokenized_dataset[\"validation\"],\n", " data_collator=data_collator,\n", " tokenizer=tokenizer,\n", " compute_metrics=compute_metrics\n", ")" ] }, { "cell_type": "code", "execution_count": 15, "id": "9f78efdc", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "The following columns in the training set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, ids, ner_tags. If tokens, ids, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.\n", "/mounts/work/akoksal/anaconda3/envs/lmbias/lib/python3.9/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", " warnings.warn(\n", "***** Running training *****\n", " Num examples = 799\n", " Num Epochs = 5\n", " Instantaneous batch size per device = 16\n", " Total train batch size (w. parallel, distributed & accumulation) = 16\n", " Gradient Accumulation steps = 1\n", " Total optimization steps = 250\n", " Number of trainable parameters = 183773977\n", "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n" ] }, { "data": { "text/html": [ "\n", "
Epoch | \n", "Training Loss | \n", "Validation Loss | \n", "Overall Precision | \n", "Overall Recall | \n", "Overall F1 | \n", "Overall Accuracy | \n", "Bina F1 | \n", "Bina Recall | \n", "Bina Precision | \n", "Bina Support | \n", "Bulvar F1 | \n", "Bulvar Recall | \n", "Bulvar Precision | \n", "Bulvar Support | \n", "Cadde F1 | \n", "Cadde Recall | \n", "Cadde Precision | \n", "Cadde Support | \n", "Diskapino F1 | \n", "Diskapino Recall | \n", "Diskapino Precision | \n", "Diskapino Support | \n", "Ilce F1 | \n", "Ilce Recall | \n", "Ilce Precision | \n", "Ilce Support | \n", "Isim F1 | \n", "Isim Recall | \n", "Isim Precision | \n", "Isim Support | \n", "Mahalle F1 | \n", "Mahalle Recall | \n", "Mahalle Precision | \n", "Mahalle Support | \n", "Sehir F1 | \n", "Sehir Recall | \n", "Sehir Precision | \n", "Sehir Support | \n", "Site F1 | \n", "Site Recall | \n", "Site Precision | \n", "Site Support | \n", "Sokak F1 | \n", "Sokak Recall | \n", "Sokak Precision | \n", "Sokak Support | \n", "Soyisim F1 | \n", "Soyisim Recall | \n", "Soyisim Precision | \n", "Soyisim Support | \n", "Telefonno F1 | \n", "Telefonno Recall | \n", "Telefonno Precision | \n", "Telefonno Support | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | \n", "1.349500 | \n", "0.357321 | \n", "0.783270 | \n", "0.828974 | \n", "0.805474 | \n", "0.908936 | \n", "0.600000 | \n", "0.705882 | \n", "0.521739 | \n", "34 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "5 | \n", "0.588235 | \n", "0.833333 | \n", "0.454545 | \n", "24 | \n", "0.769231 | \n", "0.892857 | \n", "0.675676 | \n", "28 | \n", "0.830508 | \n", "0.816667 | \n", "0.844828 | \n", "60 | \n", "0.888889 | \n", "0.926829 | \n", "0.853933 | \n", "82 | \n", "0.750000 | \n", "0.792453 | \n", "0.711864 | \n", "53 | \n", "0.867133 | \n", "0.861111 | \n", "0.873239 | \n", "72 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "6 | \n", "0.750000 | \n", "0.620690 | \n", "0.947368 | \n", "29 | \n", "0.900000 | \n", "0.887324 | \n", "0.913043 | \n", "71 | \n", "0.985075 | \n", "1.000000 | \n", "0.970588 | \n", "33 | \n", "
2 | \n", "0.264700 | \n", "0.220467 | \n", "0.885149 | \n", "0.899396 | \n", "0.892216 | \n", "0.944792 | \n", "0.782609 | \n", "0.794118 | \n", "0.771429 | \n", "34 | \n", "0.666667 | \n", "0.800000 | \n", "0.571429 | \n", "5 | \n", "0.875000 | \n", "0.875000 | \n", "0.875000 | \n", "24 | \n", "0.862069 | \n", "0.892857 | \n", "0.833333 | \n", "28 | \n", "0.894309 | \n", "0.916667 | \n", "0.873016 | \n", "60 | \n", "0.884848 | \n", "0.890244 | \n", "0.879518 | \n", "82 | \n", "0.897196 | \n", "0.905660 | \n", "0.888889 | \n", "53 | \n", "0.915493 | \n", "0.902778 | \n", "0.928571 | \n", "72 | \n", "0.181818 | \n", "0.166667 | \n", "0.200000 | \n", "6 | \n", "0.949153 | \n", "0.965517 | \n", "0.933333 | \n", "29 | \n", "0.950355 | \n", "0.943662 | \n", "0.957143 | \n", "71 | \n", "0.985075 | \n", "1.000000 | \n", "0.970588 | \n", "33 | \n", "
3 | \n", "0.158700 | \n", "0.219565 | \n", "0.876768 | \n", "0.873239 | \n", "0.875000 | \n", "0.940808 | \n", "0.805556 | \n", "0.852941 | \n", "0.763158 | \n", "34 | \n", "0.666667 | \n", "1.000000 | \n", "0.500000 | \n", "5 | \n", "0.880000 | \n", "0.916667 | \n", "0.846154 | \n", "24 | \n", "0.827586 | \n", "0.857143 | \n", "0.800000 | \n", "28 | \n", "0.881356 | \n", "0.866667 | \n", "0.896552 | \n", "60 | \n", "0.822785 | \n", "0.792683 | \n", "0.855263 | \n", "82 | \n", "0.886792 | \n", "0.886792 | \n", "0.886792 | \n", "53 | \n", "0.892086 | \n", "0.861111 | \n", "0.925373 | \n", "72 | \n", "0.400000 | \n", "0.333333 | \n", "0.500000 | \n", "6 | \n", "0.881356 | \n", "0.896552 | \n", "0.866667 | \n", "29 | \n", "0.957143 | \n", "0.943662 | \n", "0.971014 | \n", "71 | \n", "0.985075 | \n", "1.000000 | \n", "0.970588 | \n", "33 | \n", "
4 | \n", "0.115000 | \n", "0.215329 | \n", "0.897541 | \n", "0.881288 | \n", "0.889340 | \n", "0.946500 | \n", "0.857143 | \n", "0.882353 | \n", "0.833333 | \n", "34 | \n", "0.909091 | \n", "1.000000 | \n", "0.833333 | \n", "5 | \n", "0.897959 | \n", "0.916667 | \n", "0.880000 | \n", "24 | \n", "0.862069 | \n", "0.892857 | \n", "0.833333 | \n", "28 | \n", "0.881356 | \n", "0.866667 | \n", "0.896552 | \n", "60 | \n", "0.810127 | \n", "0.780488 | \n", "0.842105 | \n", "82 | \n", "0.886792 | \n", "0.886792 | \n", "0.886792 | \n", "53 | \n", "0.890511 | \n", "0.847222 | \n", "0.938462 | \n", "72 | \n", "0.727273 | \n", "0.666667 | \n", "0.800000 | \n", "6 | \n", "0.950820 | \n", "1.000000 | \n", "0.906250 | \n", "29 | \n", "0.949640 | \n", "0.929577 | \n", "0.970588 | \n", "71 | \n", "0.985075 | \n", "1.000000 | \n", "0.970588 | \n", "33 | \n", "
5 | \n", "0.093800 | \n", "0.231558 | \n", "0.895492 | \n", "0.879276 | \n", "0.887310 | \n", "0.945361 | \n", "0.833333 | \n", "0.882353 | \n", "0.789474 | \n", "34 | \n", "0.909091 | \n", "1.000000 | \n", "0.833333 | \n", "5 | \n", "0.880000 | \n", "0.916667 | \n", "0.846154 | \n", "24 | \n", "0.813559 | \n", "0.857143 | \n", "0.774194 | \n", "28 | \n", "0.888889 | \n", "0.866667 | \n", "0.912281 | \n", "60 | \n", "0.833333 | \n", "0.792683 | \n", "0.878378 | \n", "82 | \n", "0.895238 | \n", "0.886792 | \n", "0.903846 | \n", "53 | \n", "0.898551 | \n", "0.861111 | \n", "0.939394 | \n", "72 | \n", "0.727273 | \n", "0.666667 | \n", "0.800000 | \n", "6 | \n", "0.881356 | \n", "0.896552 | \n", "0.866667 | \n", "29 | \n", "0.957143 | \n", "0.943662 | \n", "0.971014 | \n", "71 | \n", "0.985075 | \n", "1.000000 | \n", "0.970588 | \n", "33 | \n", "
"
],
"text/plain": [
"\n",
" \n",
"
\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" support \n",
" precision \n",
" recall \n",
" f1 \n",
" accuracy \n",
" \n",
" \n",
" overall \n",
" 957 \n",
" 0.84 \n",
" 0.88 \n",
" 0.86 \n",
" 0.94 \n",
" \n",
" \n",
" bina \n",
" 66 \n",
" 0.66 \n",
" 0.74 \n",
" 0.70 \n",
" NaN \n",
" \n",
" \n",
" bulvar \n",
" 13 \n",
" 0.92 \n",
" 0.92 \n",
" 0.92 \n",
" NaN \n",
" \n",
" \n",
" cadde \n",
" 57 \n",
" 0.77 \n",
" 0.84 \n",
" 0.81 \n",
" NaN \n",
" \n",
" \n",
" diskapino \n",
" 70 \n",
" 0.69 \n",
" 0.73 \n",
" 0.71 \n",
" NaN \n",
" \n",
" \n",
" ilce \n",
" 117 \n",
" 0.89 \n",
" 0.96 \n",
" 0.92 \n",
" NaN \n",
" \n",
" \n",
" isim \n",
" 113 \n",
" 0.86 \n",
" 0.90 \n",
" 0.88 \n",
" NaN \n",
" \n",
" \n",
" mahalle \n",
" 120 \n",
" 0.77 \n",
" 0.82 \n",
" 0.79 \n",
" NaN \n",
" \n",
" \n",
" sehir \n",
" 146 \n",
" 0.98 \n",
" 0.97 \n",
" 0.97 \n",
" NaN \n",
" \n",
" \n",
" site \n",
" 18 \n",
" 0.79 \n",
" 0.61 \n",
" 0.69 \n",
" NaN \n",
" \n",
" \n",
" sokak \n",
" 62 \n",
" 0.72 \n",
" 0.74 \n",
" 0.73 \n",
" NaN \n",
" \n",
" \n",
" soyisim \n",
" 98 \n",
" 0.94 \n",
" 0.95 \n",
" 0.94 \n",
" NaN \n",
" \n",
" \n",
" \n",
"telefonno \n",
" 77 \n",
" 0.99 \n",
" 1.00 \n",
" 0.99 \n",
" NaN \n",
"