{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "108ad76c-9502-40d3-86b2-7309affc926e", "metadata": {}, "outputs": [], "source": [ "import shutil\n", "import requests\n", "import sys\n", "from typing import Optional, List, Tuple\n", "import json\n", "from langchain_community.llms import HuggingFaceHub" ] }, { "cell_type": "code", "execution_count": 3, "id": "27579e99-9637-4fe5-902c-05c4969ea3aa", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "21ac52ca52fa4910a0e06e3286813c57", "version_major": 2, "version_minor": 0 }, "text/plain": [ "adapter_config.json: 0%| | 0.00/701 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "HuggingFaceH4/zephyr-7b-beta\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d5c705f48d0d40fbb1dd3d409b7f4d7f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/638 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f209655a6cc743a99cdfa9e0fe40b9f3", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model.safetensors.index.json: 0%| | 0.00/23.9k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4b5d3521692d4dee9a6989a37b97d7bf", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading shards: 0%| | 0/8 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f66b9cd6a8b04bddb99bbc34fafd6e2e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00001-of-00008.safetensors: 0%| | 0.00/1.89G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3469f59063564f2f87022dbb69bc893e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00002-of-00008.safetensors: 0%| | 0.00/1.95G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2b9a7fd2ce904339b08fe41c4f912237", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00003-of-00008.safetensors: 0%| | 0.00/1.98G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "04318a351ff3418685c823f9cf77f2cd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00004-of-00008.safetensors: 0%| | 0.00/1.95G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "87d710cb385e4db2ba8ffaf670841d40", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00005-of-00008.safetensors: 0%| | 0.00/1.98G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "dcdf133396924c2b84c4a86e5badfabe", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00006-of-00008.safetensors: 0%| | 0.00/1.95G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1256b467e1a04d75a4bd969e081dff88", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00007-of-00008.safetensors: 0%| | 0.00/1.98G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "40e036179dcf43bbac1e2c783638e086", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00008-of-00008.safetensors: 0%| | 0.00/816M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ff862b62cc0c4b3fa6c2d49e617d8dd2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/8 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "09733da86a4c429db42a5d99d34f7035", "version_major": 2, "version_minor": 0 }, "text/plain": [ "generation_config.json: 0%| | 0.00/111 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a4951f876a47494aade4c74786c51b5b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer_config.json: 0%| | 0.00/1.43k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "267a73bd9a494d43899a2ffc75f19953", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.model: 0%| | 0.00/493k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3536d4a8071041ec9137bc931366e77b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.json: 0%| | 0.00/1.80M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a66e862cfb374441b2caa7ac5b232435", "version_major": 2, "version_minor": 0 }, "text/plain": [ "added_tokens.json: 0%| | 0.00/42.0 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ca714bb5a9c743219223f2163715e396", "version_major": 2, "version_minor": 0 }, "text/plain": [ "special_tokens_map.json: 0%| | 0.00/168 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a1b80a7393944f06a43db4ba4419c293", "version_major": 2, "version_minor": 0 }, "text/plain": [ "adapter_model.safetensors: 0%| | 0.00/83.9M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/peft/tuners/lora/bnb.py:272: UserWarning: Merge lora module to 4-bit linear may get different generations due to rounding errors.\n", " warnings.warn(\n" ] } ], "source": [ "##Loading the Model to answer questions\n", "import torch\n", "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n", "from peft import PeftModel, PeftConfig\n", "\n", "\n", "peft_model_id = \"Ubaidbhat/zephr_database_finetuned\"\n", "config = PeftConfig.from_pretrained(peft_model_id)\n", "print(config.base_model_name_or_path)\n", "bnb_config = BitsAndBytesConfig(\n", " load_in_4bit = True,\n", " bnb_4bit_use_double_quant=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_compute_dtype=torch.bfloat16\n", ")\n", "\n", "d_map = {\"\": torch.cuda.current_device()} if torch.cuda.is_available() else None\n", "\n", "model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config, device_map=d_map)\n", "tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)\n", "model = PeftModel.from_pretrained(model, peft_model_id)\n", "model = model.merge_and_unload()" ] }, { "cell_type": "code", "execution_count": 4, "id": "be4a09d2-6cff-4937-a6fb-4f58e4f895ff", "metadata": {}, "outputs": [], "source": [ "##Creating base Model Chain\n", "from langchain.llms import HuggingFacePipeline\n", "from langchain.prompts import PromptTemplate\n", "from transformers import pipeline\n", "from langchain_core.output_parsers import StrOutputParser\n", "from langchain.chains import LLMChain\n", "\n", "text_generation_pipeline = pipeline(\n", " model=model,\n", " tokenizer=tokenizer,\n", " task=\"text-generation\",\n", " temperature=0.2,\n", " do_sample=True,\n", " repetition_penalty=1.1,\n", " return_full_text=True,\n", " max_new_tokens=400,\n", " pad_token_id=tokenizer.eos_token_id,\n", ")\n", "\n", "llm = HuggingFacePipeline(pipeline=text_generation_pipeline)\n", "\n", "prompt_template = \"\"\"\n", "<|system|>\n", "Answer the question based on your knowledge.\n", "\n", "<|user|>\n", "{question}\n", "\n", "<|assistant|>\n", "\"\"\"\n", "\n", "prompt = PromptTemplate(\n", " input_variables=[\"question\"],\n", " template=prompt_template,\n", ")\n", "\n", "llm_chain = prompt | llm | StrOutputParser()\n", "\n", "def inference(question):\n", " llmAnswer = llm_chain.invoke({\"question\": question})\n", " llmAnswer = llmAnswer.rstrip()\n", " return llmAnswer" ] }, { "cell_type": "code", "execution_count": 6, "id": "9e4c410a-5fdf-4b52-96e2-6745b874cb16", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7862\n", "Running on public URL: https://5eea2cc419906ab3e9.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "