{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"gpu","dataSources":[],"dockerImageVersionId":30626,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git\n!pip install -q datasets bitsandbytes einops wandb","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19"}},{"cell_type":"code","source":"!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git\n!pip install -q datasets bitsandbytes einops wandb","metadata":{"execution":{"iopub.status.busy":"2023-12-22T06:47:01.346659Z","iopub.execute_input":"2023-12-22T06:47:01.347244Z","iopub.status.idle":"2023-12-22T06:47:56.552101Z","shell.execute_reply.started":"2023-12-22T06:47:01.347203Z","shell.execute_reply":"2023-12-22T06:47:56.550786Z"},"trusted":true},"execution_count":1,"outputs":[]},{"cell_type":"code","source":"! pip install --upgrade datasets","metadata":{"execution":{"iopub.status.busy":"2023-12-22T06:47:56.554439Z","iopub.execute_input":"2023-12-22T06:47:56.555221Z","iopub.status.idle":"2023-12-22T06:48:10.456632Z","shell.execute_reply.started":"2023-12-22T06:47:56.555183Z","shell.execute_reply":"2023-12-22T06:48:10.455669Z"},"trusted":true},"execution_count":2,"outputs":[{"name":"stdout","text":"Requirement already satisfied: datasets in /opt/conda/lib/python3.10/site-packages (2.1.0)\nCollecting datasets\n Obtaining dependency information for datasets from https://files.pythonhosted.org/packages/e2/cf/db41e572d7ed958e8679018f8190438ef700aeb501b62da9e1eed9e4d69a/datasets-2.15.0-py3-none-any.whl.metadata\n Downloading datasets-2.15.0-py3-none-any.whl.metadata (20 kB)\nRequirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from datasets) (1.24.3)\nRequirement already satisfied: pyarrow>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (11.0.0)\nCollecting pyarrow-hotfix (from datasets)\n Obtaining dependency information for pyarrow-hotfix from https://files.pythonhosted.org/packages/e4/f4/9ec2222f5f5f8ea04f66f184caafd991a39c8782e31f5b0266f101cb68ca/pyarrow_hotfix-0.6-py3-none-any.whl.metadata\n Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)\nRequirement already satisfied: dill<0.3.8,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.3.7)\nRequirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets) (2.0.3)\nRequirement already satisfied: requests>=2.19.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (2.31.0)\nRequirement already satisfied: tqdm>=4.62.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (4.66.1)\nRequirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets) (3.4.1)\nRequirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets) (0.70.15)\nCollecting fsspec[http]<=2023.10.0,>=2023.1.0 (from datasets)\n Obtaining dependency information for fsspec[http]<=2023.10.0,>=2023.1.0 from https://files.pythonhosted.org/packages/e8/f6/3eccfb530aac90ad1301c582da228e4763f19e719ac8200752a4841b0b2d/fsspec-2023.10.0-py3-none-any.whl.metadata\n Downloading fsspec-2023.10.0-py3-none-any.whl.metadata (6.8 kB)\nRequirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets) (3.8.5)\nRequirement already satisfied: huggingface-hub>=0.18.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.19.4)\nRequirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from datasets) (21.3)\nRequirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (6.0.1)\nRequirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (23.1.0)\nRequirement already satisfied: charset-normalizer<4.0,>=2.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (3.2.0)\nRequirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.4)\nRequirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.3)\nRequirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.2)\nRequirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.0)\nRequirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.18.0->datasets) (3.12.2)\nRequirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.18.0->datasets) (4.5.0)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging->datasets) (3.0.9)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (3.4)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (1.26.15)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (2023.11.17)\nRequirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)\nRequirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)\nRequirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)\nRequirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\nDownloading datasets-2.15.0-py3-none-any.whl (521 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m521.2/521.2 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n\u001b[?25hDownloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\nDownloading fsspec-2023.10.0-py3-none-any.whl (166 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m166.4/166.4 kB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hInstalling collected packages: pyarrow-hotfix, fsspec, datasets\n Attempting uninstall: fsspec\n Found existing installation: fsspec 2023.12.2\n Uninstalling fsspec-2023.12.2:\n Successfully uninstalled fsspec-2023.12.2\n Attempting uninstall: datasets\n Found existing installation: datasets 2.1.0\n Uninstalling datasets-2.1.0:\n Successfully uninstalled datasets-2.1.0\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\ncudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.\ncuml 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.\ndask-cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.\ncudf 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.0.3 which is incompatible.\ncudf 23.8.0 requires protobuf<5,>=4.21, but you have protobuf 3.20.3 which is incompatible.\ncuml 23.8.0 requires dask==2023.7.1, but you have dask 2023.12.0 which is incompatible.\ncuml 23.8.0 requires distributed==2023.7.1, but you have distributed 2023.12.0 which is incompatible.\ndask-cuda 23.8.0 requires dask==2023.7.1, but you have dask 2023.12.0 which is incompatible.\ndask-cuda 23.8.0 requires distributed==2023.7.1, but you have distributed 2023.12.0 which is incompatible.\ndask-cuda 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.0.3 which is incompatible.\ndask-cudf 23.8.0 requires dask==2023.7.1, but you have dask 2023.12.0 which is incompatible.\ndask-cudf 23.8.0 requires distributed==2023.7.1, but you have distributed 2023.12.0 which is incompatible.\ndask-cudf 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.0.3 which is incompatible.\ngcsfs 2023.6.0 requires fsspec==2023.6.0, but you have fsspec 2023.10.0 which is incompatible.\nraft-dask 23.8.0 requires dask==2023.7.1, but you have dask 2023.12.0 which is incompatible.\nraft-dask 23.8.0 requires distributed==2023.7.1, but you have distributed 2023.12.0 which is incompatible.\ns3fs 2023.12.2 requires fsspec==2023.12.2, but you have fsspec 2023.10.0 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed datasets-2.15.0 fsspec-2023.10.0 pyarrow-hotfix-0.6\n","output_type":"stream"}]},{"cell_type":"code","source":"from datasets import load_dataset\n\ndataset_name = \"OpenAssistant/oasst1\"\ndataset_ori = load_dataset(dataset_name, split=\"train\")","metadata":{"execution":{"iopub.status.busy":"2023-12-22T06:48:10.457891Z","iopub.execute_input":"2023-12-22T06:48:10.458173Z","iopub.status.idle":"2023-12-22T06:48:29.799894Z","shell.execute_reply.started":"2023-12-22T06:48:10.458145Z","shell.execute_reply":"2023-12-22T06:48:29.799162Z"},"trusted":true},"execution_count":3,"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading readme: 0%| | 0.00/10.2k [00:00=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.3\n warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Downloading data files: 0%| | 0/2 [00:00","text/html":"Tracking run with wandb version 0.16.1"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"","text/html":"Run data is saved locally in /kaggle/working/wandb/run-20231222_065400-6154ymb5"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"","text/html":"Syncing run wise-flower-5 to Weights & Biases (docs)
"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"","text/html":" View project at https://wandb.ai/sriramya-toleti/huggingface"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"","text/html":" View run at https://wandb.ai/sriramya-toleti/huggingface/runs/6154ymb5"},"metadata":{}},{"name":"stderr","text":"You're using a CodeGenTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"","text/html":"\n
\n \n \n [500/500 1:23:17, Epoch 0/1]\n
\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
StepTraining Loss
101.870800
201.792100
301.740000
401.685400
501.802200
601.746600
701.738900
801.655400
901.721500
1001.799100
1101.729200
1201.652500
1301.764100
1401.717000
1501.784400
1601.754300
1701.680400
1801.644700
1901.759300
2001.768600
2101.734300
2201.636500
2301.666700
2401.691200
2501.764700
2601.702900
2701.608000
2801.680700
2901.633000
3001.885800
3101.692800
3201.706800
3301.643200
3401.739800
3501.677900
3601.675400
3701.720200
3801.616800
3901.649500
4001.859900
4101.743400
4201.699300
4301.623900
4401.644700
4501.762400
4601.602100
4701.607500
4801.666300
4901.646800
5001.766400

"},"metadata":{}},{"execution_count":12,"output_type":"execute_result","data":{"text/plain":"TrainOutput(global_step=500, training_loss=1.711106170654297, metrics={'train_runtime': 5222.3118, 'train_samples_per_second': 1.532, 'train_steps_per_second': 0.096, 'total_flos': 3.293667738832896e+16, 'train_loss': 1.711106170654297, 'epoch': 0.81})"},"metadata":{}}]},{"cell_type":"code","source":"from transformers import (\n AutoModelForCausalLM,\n AutoTokenizer,\n BitsAndBytesConfig,\n HfArgumentParser,\n TrainingArguments,\n pipeline,\n logging,\n)","metadata":{"execution":{"iopub.status.busy":"2023-12-22T08:18:01.578105Z","iopub.execute_input":"2023-12-22T08:18:01.578425Z","iopub.status.idle":"2023-12-22T08:18:03.646153Z","shell.execute_reply.started":"2023-12-22T08:18:01.578398Z","shell.execute_reply":"2023-12-22T08:18:03.645130Z"},"trusted":true},"execution_count":13,"outputs":[]},{"cell_type":"code","source":"# Run text generation pipeline with our next model\nprompt = \"What is a large language model?\"\npipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\nresult = pipe(f\"### Human: {prompt}\",max_new_tokens=200)\nprint(result[0]['generated_text'])","metadata":{"execution":{"iopub.status.busy":"2023-12-22T08:18:03.647403Z","iopub.execute_input":"2023-12-22T08:18:03.647686Z","iopub.status.idle":"2023-12-22T08:18:20.906045Z","shell.execute_reply.started":"2023-12-22T08:18:03.647660Z","shell.execute_reply":"2023-12-22T08:18:20.904220Z"},"trusted":true},"execution_count":14,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1518: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )\n warnings.warn(\nBoth `max_new_tokens` (=200) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n","output_type":"stream"},{"name":"stdout","text":"### Human: What is a large language model?### Assistant: A large language model (LLM) is a type of artificial intelligence model that is trained on a massive amount of text data to learn the patterns and structures of natural language. These models are designed to be able to generate human-like text, and they are often used in applications such as language translation, text generation, and chatbots.\n\nLLMs are typically trained on large datasets of text, such as books, articles, and social media posts, and they use complex algorithms to learn the relationships between words and phrases. The more data that is available to train the model, the more accurate and useful it becomes.\n\nSome popular LLMs include GPT-3, which was trained on over 50 billion words of text, and BERT, which was trained on over 1.5 billion words of text. These models are capable of generating high-quality text that is often indistinguishable from that generated by a human.### Human: What are some applications of large language models\n","output_type":"stream"}]},{"cell_type":"code","source":"# Run text generation pipeline with our next model\nprompt = \"Explain about deep learning?\"\npipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\nresult = pipe(f\"{prompt}\",max_new_tokens=200)\nprint(result[0]['generated_text'])","metadata":{"execution":{"iopub.status.busy":"2023-12-22T08:47:47.055413Z","iopub.execute_input":"2023-12-22T08:47:47.056076Z","iopub.status.idle":"2023-12-22T08:48:04.378807Z","shell.execute_reply.started":"2023-12-22T08:47:47.056040Z","shell.execute_reply":"2023-12-22T08:48:04.377853Z"},"trusted":true},"execution_count":15,"outputs":[{"name":"stderr","text":"Both `max_new_tokens` (=200) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n","output_type":"stream"},{"name":"stdout","text":"Explain about deep learning?\n\nI am trying to understand deep learning. I have read about it, but I am still confused. Can you explain it to me in simple terms?\n\nThank you.\n\nReply:\n\nDeep learning is a type of artificial intelligence that uses neural networks to learn from data. Neural networks are made up of layers of interconnected nodes that process information. Deep learning algorithms are designed to learn from large amounts of data and can be used to solve complex problems.\n\nDeep learning is used in many applications, such as image recognition, natural language processing, and speech recognition. It is also used in self-driving cars and other autonomous systems.\n\nDeep learning is different from traditional machine learning because it uses more layers of nodes and can learn more complex patterns in the data. This allows it to make more accurate predictions and decisions.\n\nI hope this helps!\n\nBest regards,\n[Your Name]\n\nEmail 2:\n\nSubject: How does deep learning\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## Inferecing by loading the adapter model","metadata":{}},{"cell_type":"code","source":"!ls /kaggle/working/results/checkpoint-500\ncheckpoint_path = \"/kaggle/working/results/checkpoint-500\"","metadata":{"execution":{"iopub.status.busy":"2023-12-22T08:51:47.743397Z","iopub.execute_input":"2023-12-22T08:51:47.743865Z","iopub.status.idle":"2023-12-22T08:51:48.740560Z","shell.execute_reply.started":"2023-12-22T08:51:47.743826Z","shell.execute_reply":"2023-12-22T08:51:48.739343Z"},"trusted":true},"execution_count":20,"outputs":[{"name":"stderr","text":"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n","output_type":"stream"},{"name":"stdout","text":"README.md\t\t optimizer.pt\t\t tokenizer_config.json\nadapter_config.json\t rng_state.pth\t trainer_state.json\nadapter_model.safetensors scheduler.pt\t\t training_args.bin\nadded_tokens.json\t special_tokens_map.json vocab.json\nmerges.txt\t\t tokenizer.json\n","output_type":"stream"}]},{"cell_type":"code","source":"\nbnb_config = BitsAndBytesConfig(\n load_in_4bit=True,\n bnb_4bit_quant_type=\"nf4\",\n bnb_4bit_compute_dtype=torch.float16,\n)\n\nmodel_inference = AutoModelForCausalLM.from_pretrained(\n model_name,\n quantization_config=bnb_config, \n trust_remote_code=True\n)\nmodel_inference.config.use_cache = False\nmodel_inference.load_adapter(checkpoint_path)","metadata":{"execution":{"iopub.status.busy":"2023-12-22T08:52:14.962648Z","iopub.execute_input":"2023-12-22T08:52:14.963476Z","iopub.status.idle":"2023-12-22T08:52:20.815271Z","shell.execute_reply.started":"2023-12-22T08:52:14.963435Z","shell.execute_reply":"2023-12-22T08:52:20.814300Z"},"trusted":true},"execution_count":21,"outputs":[{"output_type":"display_data","data":{"text/plain":"Loading checkpoint shards: 0%| | 0/2 [00:00