{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "36990086",
   "metadata": {},
   "source": [
    "## This is to set your working path. You can work on your default path but ideally it is always good to have a separate folder and virtual environment for each project."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e5f26a4e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.chdir(r\"C:\\Users\\abhis\\Documents\\Llama 2\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "56865eff",
   "metadata": {},
   "source": [
    "## Ensure to have requirements.txt file in the above path and execute the below command. Please use Ctrl+Enter to execute each cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9cd46858",
   "metadata": {},
   "outputs": [],
   "source": [
    "pip install -r requirements.txt"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "176ce467",
   "metadata": {},
   "source": [
    "## Importing the necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "cc99e10b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain import HuggingFacePipeline \n",
    "from langchain import PromptTemplate, LLMChain\n",
    "from datetime import datetime\n",
    "from transformers import pipeline\n",
    "import os\n",
    "import torch\n",
    "import transformers\n",
    "from transformers import AutoTokenizer, AutoModelForCausalLM \n",
    "from transformers import LlamaForCausalLM, LlamaTokenizer\n",
    "from accelerate import init_empty_weights\n",
    "from accelerate import infer_auto_device_map, init_empty_weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "2b98fe56",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Code Execution Start18:00:09\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\abhis\\anaconda3\\lib\\site-packages\\transformers\\modeling_utils.py:2363: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "424c4a9efc994b80883a62e52ada6888",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "print(\"Code Execution Start\"+datetime.now().strftime(\"%H:%M:%S\"))\n",
    "\n",
    "model_path=\"./Static/Llama\"\n",
    "\n",
    "tokenizer = LlamaTokenizer.from_pretrained(model_path) \n",
    "model= LlamaForCausalLM.from_pretrained (model_path,device_map='auto',torch_dtype=torch.float32,\n",
    "                                         use_auth_token=True,offload_folder=\"save_folder\",local_files_only=True)\n",
    "model.tie_weights()\n",
    "\n",
    "if torch.backends.mps.is_available(): \n",
    "    mps_device = torch.device(\"mps\")\n",
    "\n",
    "#Please ensure these changes for GPU implementations\n",
    "os.environ[\"SAFETENSORS FAST_GPU\"]=\"1\"\n",
    "#torch_dtype=torch.bfloat16 is for GPU implementation only. For CPU, we have to make it 32\n",
    "pipe = pipeline(\"text-generation\",model=model,tokenizer=tokenizer,torch_dtype=torch.float32,device_map=\"auto\",\n",
    "                max_new_tokens = 40,do_sample=True,top_k=30,num_return_sequences=40,eos_token_id=tokenizer.eos_token_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "5f6222ca",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[INST]<<SYS>>\n",
      "You are an advanced assistant that excels at translation that answers query in one word. \n",
      "<</SYS>>\n",
      "\n",
      "Translate the following word from English to french. :\n",
      "\n",
      " {text}[/INST]\n",
      "Inferencing Started18:09:02\n",
      "OUTPUT>>>  Chien\n",
      "Inferencing Completed18:34:40\n"
     ]
    }
   ],
   "source": [
    "B_INST, E_INST = \"[INST]\", \"[/INST]\"\n",
    "B_SYS, E_SYS = \"<<SYS>>\\n\", \"\\n<</SYS>>\\n\\n\" \n",
    "DEFAULT_SYSTEM_PROMPT = \"\"\"\\\n",
    "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.\n",
    "If a question does not make any sense, or is not factually coherent, explain why instead of answering something\"\"\"\n",
    "\n",
    "def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT):\n",
    "    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS\n",
    "    prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST\n",
    "    return prompt_template\n",
    "\n",
    "\n",
    "llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature': 0})\n",
    "system_prompt = \"You are an advanced assistant that excels at translation that answers query in one word. \" \n",
    "instruction = \"Translate the following word from English to french. :\\n\\n {text}\" \n",
    "template = get_prompt(instruction, system_prompt)\n",
    "print(template)\n",
    "prompt = PromptTemplate(template=template, input_variables=[\"text\"])\n",
    "llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
    "text = \"Dog\"\n",
    "print(\"Inferencing Started \"+datetime.now().strftime(\"%H:%M:%S\")) \n",
    "output = llm_chain.run(text)\n",
    "print(\"OUTPUT>>>\"+ output)\n",
    "print(\"Inferencing Completed \" +datetime.now().strftime(\"%H:%M:%S\"))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}