{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "aa1a8952", "metadata": {}, "outputs": [], "source": [ "#import libraries\n", "from transformers import pipeline" ] }, { "cell_type": "code", "execution_count": 2, "id": "5493cee5", "metadata": {}, "outputs": [], "source": [ "#reference appropriate Hugging Face model\n", "model_name = 'koakande/bert-finetuned-ner'\n", "\n", "# Load token classification pipeline modelfrom Hugging Face\n", "model = pipeline(\"token-classification\", model=model_name, aggregation_strategy=\"simple\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "f59488e3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'koakande/bert-finetuned-ner'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_name" ] }, { "cell_type": "code", "execution_count": 4, "id": "e6b97a0e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity_group': 'PER',\n", " 'score': 0.99741244,\n", " 'word': 'Kabeer',\n", " 'start': 12,\n", " 'end': 18},\n", " {'entity_group': 'ORG',\n", " 'score': 0.9985826,\n", " 'word': 'OVO',\n", " 'start': 61,\n", " 'end': 64},\n", " {'entity_group': 'LOC',\n", " 'score': 0.99884343,\n", " 'word': 'UK',\n", " 'start': 72,\n", " 'end': 74}]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "msg = \"Hello, I am Kabeer. I work as a machine learning engineer at OVO in the UK\"\n", "model(msg)" ] }, { "cell_type": "code", "execution_count": 5, "id": "7c54c0ca", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"Hello, I am Kabeer. I work as a machine learning engineer at OVO in the UK\"" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# write a prediction method for the model\n", "def predict_entities(text):\n", " # Use the loaded model to identify entities in the text\n", " entities = model(text)\n", " # Highlight identified entities in the input text\n", " highlighted_text = text\n", " for entity in entities:\n", " entity_text = text[entity['start']:entity['end']]\n", " replacement = f\"{entity_text}\"\n", " highlighted_text = highlighted_text.replace(entity_text, replacement)\n", " return highlighted_text\n", "\n", "predict_entities(msg)" ] }, { "cell_type": "code", "execution_count": 7, "id": "4d784554", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7863\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# gradio interface\n", "import gradio as gr\n", "\n", "title = \"Named Entity Recognizer\"\n", "\n", "description = \"\"\"\n", "This model has been trained to identify entities in a given text. It returns the input text with the entities highlighted in green. Give it a try!\n", "\"\"\"\n", "\n", "article = \"The model is trained using bert-finetuned-ner.\"\n", "\n", "iface = gr.Interface(\n", " fn=predict_entities,\n", " inputs=gr.Textbox(lines=5, placeholder=\"Enter text...\"),\n", " outputs=gr.HTML(),\n", " title=title,\n", " description=description,\n", " article=article,\n", " examples=[[\"Hello, I am Kabeer. I work as a machine learning engineer at OVO in the UK\"], [\"This is Maryam who is a Leicester based NHS Doctor\"]],\n", ")\n", "\n", "iface.launch()" ] }, { "cell_type": "code", "execution_count": null, "id": "4f930b57", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "named_entity__kernel", "language": "python", "name": "named_entity__kernel" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 5 }