{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "aa1a8952",
   "metadata": {},
   "outputs": [],
   "source": [
    "#import libraries\n",
    "from transformers import pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5493cee5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9b94e18c676540d1ad8312149bdbcc7c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model.safetensors:   0%|          | 0.00/431M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "586b43b1b9e2484c9b8636f9b41ab5e2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d6c722044e074fd58b9cf1c67fa48551",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "95002c472f794e3b9b96dcfba697395e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.json:   0%|          | 0.00/669k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b71e5e8241de454c9bd5f7066b3f5413",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#reference appropriate Hugging Face model\n",
    "model_name = 'koakande/bert-finetuned-ner'\n",
    "\n",
    "# Load token classification pipeline modelfrom Hugging Face\n",
    "model = pipeline(\"token-classification\", model=model_name, aggregation_strategy=\"simple\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "f59488e3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'koakande/bert-finetuned-ner'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e6b97a0e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'entity_group': 'PER',\n",
       "  'score': 0.99741244,\n",
       "  'word': 'Kabeer',\n",
       "  'start': 12,\n",
       "  'end': 18},\n",
       " {'entity_group': 'ORG',\n",
       "  'score': 0.9985826,\n",
       "  'word': 'OVO',\n",
       "  'start': 61,\n",
       "  'end': 64},\n",
       " {'entity_group': 'LOC',\n",
       "  'score': 0.99884343,\n",
       "  'word': 'UK',\n",
       "  'start': 72,\n",
       "  'end': 74}]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "msg = \"Hello, I am Kabeer. I work as a machine learning engineer at OVO in the UK\"\n",
    "model(msg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "7c54c0ca",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"Hello, I am <span style='border: 2px solid green;'>Kabeer</span>. I work as a machine learning engineer at <span style='border: 2px solid green;'>OVO</span> in the <span style='border: 2px solid green;'>UK</span>\""
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# write a prediction method for the model\n",
    "def predict_entities(text):\n",
    "    # Use the loaded model to identify entities in the text\n",
    "    entities = model(text)\n",
    "    # Highlight identified entities in the input text\n",
    "    highlighted_text = text\n",
    "    for entity in entities:\n",
    "        entity_text = text[entity['start']:entity['end']]\n",
    "        replacement = f\"<span style='border: 2px solid green;'>{entity_text}</span>\"\n",
    "        highlighted_text = highlighted_text.replace(entity_text, replacement)\n",
    "    return highlighted_text\n",
    "\n",
    "predict_entities(msg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "4d784554",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on local URL:  http://127.0.0.1:7860\n",
      "\n",
      "To create a public link, set `share=True` in `launch()`.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# gradio interface\n",
    "import gradio as gr\n",
    "\n",
    "title = \"Named Entity Recognizer\"\n",
    "\n",
    "description = \"\"\"\n",
    "This model has been trained to identify entities in a given text. It returns the input text with the entities highlighted in green. Give it a try!\n",
    "\"\"\"\n",
    "\n",
    "article = \"The model is trained using bert-finetuned-ner.\"\n",
    "\n",
    "iface  = gr.Interface(\n",
    "    fn=predict_entities,\n",
    "    inputs=gr.Textbox(lines=5, placeholder=\"Enter text...\"),\n",
    "    outputs=gr.HTML(),\n",
    "    title=title,\n",
    "    description=description,\n",
    "    article=article,\n",
    "    examples=[[\"Hello, I am Kabeer. I work as a machine learning engineer at OVO in the UK\"], [\"This is Maryam who is a Leicester based NHS Doctor\"]],\n",
    ")\n",
    "\n",
    "iface.launch()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4f930b57",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "named_entity__kernel",
   "language": "python",
   "name": "named_entity__kernel"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}