{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "ename": "OSError", "evalue": "Can't load tokenizer for 'paulilioaica/MixtureOfPhi3'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'paulilioaica/MixtureOfPhi3' is the correct path to a directory containing all relevant files for a LlamaTokenizerFast tokenizer.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[3], line 7\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoModelForCausalLM, AutoTokenizer\n\u001b[1;32m 5\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpaulilioaica/MixtureOfPhi3\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 7\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m \u001b[43mAutoTokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForCausalLM\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[1;32m 9\u001b[0m model, \n\u001b[1;32m 10\u001b[0m trust_remote_code\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, \n\u001b[1;32m 11\u001b[0m )\n\u001b[1;32m 13\u001b[0m prompt\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHow many continents are there?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n", "File \u001b[0;32m/anaconda/envs/azureml_py38/lib/python3.8/site-packages/transformers/models/auto/tokenization_auto.py:880\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 878\u001b[0m tokenizer_class_py, tokenizer_class_fast \u001b[38;5;241m=\u001b[39m TOKENIZER_MAPPING[\u001b[38;5;28mtype\u001b[39m(config)]\n\u001b[1;32m 879\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tokenizer_class_fast \u001b[38;5;129;01mand\u001b[39;00m (use_fast \u001b[38;5;129;01mor\u001b[39;00m tokenizer_class_py \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 880\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtokenizer_class_fast\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 881\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 882\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tokenizer_class_py \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", "File \u001b[0;32m/anaconda/envs/azureml_py38/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:2073\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)\u001b[0m\n\u001b[1;32m 2067\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m 2068\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt load following files from cache: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00munresolved_files\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m and cannot check if these \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2069\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfiles are necessary for the tokenizer to operate.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2070\u001b[0m )\n\u001b[1;32m 2072\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mall\u001b[39m(full_file_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m full_file_name \u001b[38;5;129;01min\u001b[39;00m resolved_vocab_files\u001b[38;5;241m.\u001b[39mvalues()):\n\u001b[0;32m-> 2073\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m 2074\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt load tokenizer for \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. If you were trying to load it from \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2075\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/models\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, make sure you don\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt have a local directory with the same name. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2076\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOtherwise, make sure \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m is the correct path to a directory \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2077\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontaining all relevant files for a \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m tokenizer.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2078\u001b[0m )\n\u001b[1;32m 2080\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m file_id, file_path \u001b[38;5;129;01min\u001b[39;00m vocab_files\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 2081\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file_id \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m resolved_vocab_files:\n", "\u001b[0;31mOSError\u001b[0m: Can't load tokenizer for 'paulilioaica/MixtureOfPhi3'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'paulilioaica/MixtureOfPhi3' is the correct path to a directory containing all relevant files for a LlamaTokenizerFast tokenizer." ] } ], "source": [ "import torch\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "\n", "\n", "model = \"paulilioaica/MixtureOfPhi3\"\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(model)\n", "model = AutoModelForCausalLM.from_pretrained(\n", " model, \n", " trust_remote_code=True, \n", ")\n", "\n", "prompt=\"How many continents are there?\"\n", "input = f\"<|system|>\\nYou are a helpful AI assistant.<|end|>\\n<|user|>{prompt}\\n<|assistant|>\"\n", "tokenized_input = tokenizer.encode(input, return_tensors=\"pt\")\n", "\n", "outputs = model.generate(tokenized_input, max_new_tokens=128, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)\n", "print(outputs[0][\"generated_text\"])" ] } ], "metadata": { "kernelspec": { "display_name": "azureml_py38", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 2 }