{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "108ad76c-9502-40d3-86b2-7309affc926e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import shutil\n",
    "import requests\n",
    "import sys\n",
    "from typing import Optional, List, Tuple\n",
    "import json\n",
    "from langchain_community.llms import HuggingFaceHub"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "27579e99-9637-4fe5-902c-05c4969ea3aa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "21ac52ca52fa4910a0e06e3286813c57",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "adapter_config.json:   0%|          | 0.00/701 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "HuggingFaceH4/zephyr-7b-beta\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d5c705f48d0d40fbb1dd3d409b7f4d7f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f209655a6cc743a99cdfa9e0fe40b9f3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4b5d3521692d4dee9a6989a37b97d7bf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f66b9cd6a8b04bddb99bbc34fafd6e2e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00001-of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3469f59063564f2f87022dbb69bc893e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00002-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2b9a7fd2ce904339b08fe41c4f912237",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00003-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "04318a351ff3418685c823f9cf77f2cd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00004-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "87d710cb385e4db2ba8ffaf670841d40",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00005-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "dcdf133396924c2b84c4a86e5badfabe",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00006-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1256b467e1a04d75a4bd969e081dff88",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00007-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "40e036179dcf43bbac1e2c783638e086",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00008-of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ff862b62cc0c4b3fa6c2d49e617d8dd2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "09733da86a4c429db42a5d99d34f7035",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a4951f876a47494aade4c74786c51b5b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "267a73bd9a494d43899a2ffc75f19953",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3536d4a8071041ec9137bc931366e77b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a66e862cfb374441b2caa7ac5b232435",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ca714bb5a9c743219223f2163715e396",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "special_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a1b80a7393944f06a43db4ba4419c293",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "adapter_model.safetensors:   0%|          | 0.00/83.9M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.10/dist-packages/peft/tuners/lora/bnb.py:272: UserWarning: Merge lora module to 4-bit linear may get different generations due to rounding errors.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "##Loading the Model to answer questions\n",
    "import torch\n",
    "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n",
    "from peft import PeftModel, PeftConfig\n",
    "\n",
    "\n",
    "peft_model_id = \"Ubaidbhat/zephr_database_finetuned\"\n",
    "config = PeftConfig.from_pretrained(peft_model_id)\n",
    "print(config.base_model_name_or_path)\n",
    "bnb_config = BitsAndBytesConfig(\n",
    "    load_in_4bit = True,\n",
    "    bnb_4bit_use_double_quant=True,\n",
    "    bnb_4bit_quant_type=\"nf4\",\n",
    "    bnb_4bit_compute_dtype=torch.bfloat16\n",
    ")\n",
    "\n",
    "d_map = {\"\": torch.cuda.current_device()} if torch.cuda.is_available() else None\n",
    "\n",
    "model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config, device_map=d_map)\n",
    "tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)\n",
    "model = PeftModel.from_pretrained(model, peft_model_id)\n",
    "model = model.merge_and_unload()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "be4a09d2-6cff-4937-a6fb-4f58e4f895ff",
   "metadata": {},
   "outputs": [],
   "source": [
    "##Creating base Model Chain\n",
    "from langchain.llms import HuggingFacePipeline\n",
    "from langchain.prompts import PromptTemplate\n",
    "from transformers import pipeline\n",
    "from langchain_core.output_parsers import StrOutputParser\n",
    "from langchain.chains import LLMChain\n",
    "\n",
    "text_generation_pipeline = pipeline(\n",
    "    model=model,\n",
    "    tokenizer=tokenizer,\n",
    "    task=\"text-generation\",\n",
    "    temperature=0.2,\n",
    "    do_sample=True,\n",
    "    repetition_penalty=1.1,\n",
    "    return_full_text=True,\n",
    "    max_new_tokens=400,\n",
    "    pad_token_id=tokenizer.eos_token_id,\n",
    ")\n",
    "\n",
    "llm = HuggingFacePipeline(pipeline=text_generation_pipeline)\n",
    "\n",
    "prompt_template = \"\"\"\n",
    "<|system|>\n",
    "Answer the question based on your knowledge.\n",
    "</s>\n",
    "<|user|>\n",
    "{question}\n",
    "</s>\n",
    "<|assistant|>\n",
    "\"\"\"\n",
    "\n",
    "prompt = PromptTemplate(\n",
    "    input_variables=[\"question\"],\n",
    "    template=prompt_template,\n",
    ")\n",
    "\n",
    "llm_chain = prompt | llm | StrOutputParser()\n",
    "\n",
    "def inference(question):\n",
    "    llmAnswer = llm_chain.invoke({\"question\": question})\n",
    "    llmAnswer = llmAnswer.rstrip()\n",
    "    return llmAnswer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "9e4c410a-5fdf-4b52-96e2-6745b874cb16",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on local URL:  http://127.0.0.1:7862\n",
      "Running on public URL: https://5eea2cc419906ab3e9.gradio.live\n",
      "\n",
      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"https://5eea2cc419906ab3e9.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import gradio as gr\n",
    "from langchain_core.runnables import RunnablePassthrough\n",
    "\n",
    "def predict(question):\n",
    "    return inference(question)\n",
    "    \n",
    "pred = gr.Interface(\n",
    "    fn=predict,\n",
    "    inputs=[\n",
    "        gr.Textbox(label=\"Question\", value = \"Your Question here......\"),\n",
    "    ],\n",
    "    outputs=\"text\",\n",
    "    title=\"Finetuned Zephr Model in the Database Management Domaain\"\n",
    ")\n",
    "\n",
    "pred.launch(share=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c7be3ade-8cd6-448b-b608-0182a9743315",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}