Spaces:

abhishekkankati9
/

sample

Running

App Files Files Community

abhishekkankati9 commited on Sep 25, 2023

Commit

1ee19b7

•

1 Parent(s): 4671487

Upload 2 files

Browse files

Files changed (2) hide show

Instructions Llama 2 7B.docx +0 -0
Llama 2 Windows GPU setup.ipynb +203 -0

Instructions Llama 2 7B.docx ADDED Viewed

Binary file (974 kB). View file

Llama 2 Windows GPU setup.ipynb ADDED Viewed

	@@ -0,0 +1,203 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "36990086",
+   "metadata": {},
+   "source": [
+    "## This is to set your working path. You can work on your default path but ideally it is always good to have a separate folder and virtual environment for each project."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "e5f26a4e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.chdir(r\"C:\\Users\\abhis\\Documents\\Llama 2\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "56865eff",
+   "metadata": {},
+   "source": [
+    "## Ensure to have requirements.txt file in the above path and execute the below command. Please use Ctrl+Enter to execute each cell"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9cd46858",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pip install -r requirements.txt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "176ce467",
+   "metadata": {},
+   "source": [
+    "## Importing the necessary libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "cc99e10b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain import HuggingFacePipeline \n",
+    "from langchain import PromptTemplate, LLMChain\n",
+    "from datetime import datetime\n",
+    "from transformers import pipeline\n",
+    "import os\n",
+    "import torch\n",
+    "import transformers\n",
+    "from transformers import AutoTokenizer, AutoModelForCausalLM \n",
+    "from transformers import LlamaForCausalLM, LlamaTokenizer\n",
+    "from accelerate import init_empty_weights\n",
+    "from accelerate import infer_auto_device_map, init_empty_weights"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2b98fe56",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Code Execution Start18:00:09\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\abhis\\anaconda3\\lib\\site-packages\\transformers\\modeling_utils.py:2363: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "424c4a9efc994b80883a62e52ada6888",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "print(\"Code Execution Start\"+datetime.now().strftime(\"%H:%M:%S\"))\n",
+    "\n",
+    "model_path=\"./Static/Llama\"\n",
+    "\n",
+    "tokenizer = LlamaTokenizer.from_pretrained(model_path) \n",
+    "model= LlamaForCausalLM.from_pretrained (model_path,device_map='auto',torch_dtype=torch.float32,\n",
+    "                                         use_auth_token=True,offload_folder=\"save_folder\",local_files_only=True)\n",
+    "model.tie_weights()\n",
+    "\n",
+    "if torch.backends.mps.is_available(): \n",
+    "    mps_device = torch.device(\"mps\")\n",
+    "\n",
+    "#Please ensure these changes for GPU implementations\n",
+    "os.environ[\"SAFETENSORS FAST_GPU\"]=\"1\"\n",
+    "#torch_dtype=torch.bfloat16 is for GPU implementation only. For CPU, we have to make it 32\n",
+    "pipe = pipeline(\"text-generation\",model=model,tokenizer=tokenizer,torch_dtype=torch.float32,device_map=\"auto\",\n",
+    "                max_new_tokens = 40,do_sample=True,top_k=30,num_return_sequences=40,eos_token_id=tokenizer.eos_token_id)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "5f6222ca",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[INST]<<SYS>>\n",
+      "You are an advanced assistant that excels at translation that answers query in one word. \n",
+      "<</SYS>>\n",
+      "\n",
+      "Translate the following word from English to french. :\n",
+      "\n",
+      " {text}[/INST]\n",
+      "Inferencing Started18:09:02\n",
+      "OUTPUT>>>  Chien\n",
+      "Inferencing Completed18:34:40\n"
+     ]
+    }
+   ],
+   "source": [
+    "B_INST, E_INST = \"[INST]\", \"[/INST]\"\n",
+    "B_SYS, E_SYS = \"<<SYS>>\\n\", \"\\n<</SYS>>\\n\\n\" \n",
+    "DEFAULT_SYSTEM_PROMPT = \"\"\"\\\n",
+    "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.\n",
+    "If a question does not make any sense, or is not factually coherent, explain why instead of answering something\"\"\"\n",
+    "\n",
+    "def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT):\n",
+    "    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS\n",
+    "    prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST\n",
+    "    return prompt_template\n",
+    "\n",
+    "\n",
+    "llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature': 0})\n",
+    "system_prompt = \"You are an advanced assistant that excels at translation that answers query in one word. \" \n",
+    "instruction = \"Translate the following word from English to french. :\\n\\n {text}\" \n",
+    "template = get_prompt(instruction, system_prompt)\n",
+    "print(template)\n",
+    "prompt = PromptTemplate(template=template, input_variables=[\"text\"])\n",
+    "llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
+    "text = \"Dog\"\n",
+    "print(\"Inferencing Started \"+datetime.now().strftime(\"%H:%M:%S\")) \n",
+    "output = llm_chain.run(text)\n",
+    "print(\"OUTPUT>>>\"+ output)\n",
+    "print(\"Inferencing Completed \" +datetime.now().strftime(\"%H:%M:%S\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}