{ "cells": [ { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "import torch\n", "import transformers\n", "\n", "\n", "model = transformers.AutoModel.from_pretrained(\n", " 'numind/NuNER-v1.0',\n", " output_hidden_states=True\n", ")\n", "tokenizer = transformers.AutoTokenizer.from_pretrained(\n", " 'numind/NuNER-v1.0'\n", ")\n", "\n", "text = [\n", " \"NuMind is an AI company based in Paris and USA.\",\n", " \"See other models from us on https://huggingface.co/numind\"\n", "]\n", "encoded_input = tokenizer(\n", " text,\n", " return_tensors='pt',\n", " padding=True,\n", " truncation=True\n", ")\n", "output = model(**encoded_input)\n", "\n", "# for better quality\n", "emb = torch.cat(\n", " (output.hidden_states[-1], output.hidden_states[-7]),\n", " dim=2\n", ")\n", "\n", "# for better speed\n", "# emb = output.hidden_states[-1]\n", "\n" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at numind/NuNER-v1.0 and are newly initialized: ['classifier.bias', 'classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "ename": "KeyError", "evalue": "'tokens'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[36], line 25\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m result \u001b[38;5;129;01min\u001b[39;00m results:\n\u001b[1;32m 23\u001b[0m \u001b[38;5;66;03m# Access tokens list using the 'tokens' key (dictionary access)\u001b[39;00m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m res \u001b[38;5;129;01min\u001b[39;00m result:\n\u001b[0;32m---> 25\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m token \u001b[38;5;129;01min\u001b[39;00m \u001b[43mres\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtokens\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m:\n\u001b[1;32m 26\u001b[0m \u001b[38;5;66;03m# Remove the special token prefix (if present)\u001b[39;00m\n\u001b[1;32m 27\u001b[0m word \u001b[38;5;241m=\u001b[39m token[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mword\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mstrip(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mĠ\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 28\u001b[0m \u001b[38;5;66;03m# Look up the entity type based on the predicted label\u001b[39;00m\n", "\u001b[0;31mKeyError\u001b[0m: 'tokens'" ] } ], "source": [ "import torch\n", "import transformers\n", "from transformers import pipeline\n", "\n", "# Load pre-trained NER model (NuNER-v1.0)\n", "ner = pipeline(\"ner\", model=\"numind/NuNER-v1.0\")\n", "\n", "text = [\n", " \"NuMind is an AI company based in Paris and USA.\",\n", " \"See other models from us on https://huggingface.co/numind\"\n", "]\n", "\n", "# Process the text and get NER predictions\n", "results = ner(text)\n", "\n", "label_map = {\n", " \"LABEL_0\": \"ORG\", # Organization\n", " \"LABEL_1\": \"LOC\", # Location\n", " # You can add more labels and their mappings here\n", "}\n", "\n", "for result in results:\n", " # Access tokens list using the 'tokens' key (dictionary access)\n", " for res in result:\n", " # Remove the special token prefix (if present)\n", " word = res['word'].strip('Ġ')\n", " # Look up the entity type based on the predicted label\n", " entity_type = label_map.get(res['entity'], \"UNKNOWN\")\n", " print(f\"Word: {word}, Entity Type: {entity_type}\")\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.1" } }, "nbformat": 4, "nbformat_minor": 2 }