File size: 50,465 Bytes
486ca0f
1
2
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"gpu","dataSources":[],"dockerImageVersionId":30626,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git\n!pip install -q datasets bitsandbytes einops wandb","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19"}},{"cell_type":"code","source":"!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git\n!pip install -q datasets bitsandbytes einops wandb","metadata":{"execution":{"iopub.status.busy":"2023-12-22T06:47:01.346659Z","iopub.execute_input":"2023-12-22T06:47:01.347244Z","iopub.status.idle":"2023-12-22T06:47:56.552101Z","shell.execute_reply.started":"2023-12-22T06:47:01.347203Z","shell.execute_reply":"2023-12-22T06:47:56.550786Z"},"trusted":true},"execution_count":1,"outputs":[]},{"cell_type":"code","source":"! pip install --upgrade datasets","metadata":{"execution":{"iopub.status.busy":"2023-12-22T06:47:56.554439Z","iopub.execute_input":"2023-12-22T06:47:56.555221Z","iopub.status.idle":"2023-12-22T06:48:10.456632Z","shell.execute_reply.started":"2023-12-22T06:47:56.555183Z","shell.execute_reply":"2023-12-22T06:48:10.455669Z"},"trusted":true},"execution_count":2,"outputs":[{"name":"stdout","text":"Requirement already satisfied: datasets in /opt/conda/lib/python3.10/site-packages (2.1.0)\nCollecting datasets\n  Obtaining dependency information for datasets from https://files.pythonhosted.org/packages/e2/cf/db41e572d7ed958e8679018f8190438ef700aeb501b62da9e1eed9e4d69a/datasets-2.15.0-py3-none-any.whl.metadata\n  Downloading datasets-2.15.0-py3-none-any.whl.metadata (20 kB)\nRequirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from datasets) (1.24.3)\nRequirement already satisfied: pyarrow>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (11.0.0)\nCollecting pyarrow-hotfix (from datasets)\n  Obtaining dependency information for pyarrow-hotfix from https://files.pythonhosted.org/packages/e4/f4/9ec2222f5f5f8ea04f66f184caafd991a39c8782e31f5b0266f101cb68ca/pyarrow_hotfix-0.6-py3-none-any.whl.metadata\n  Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)\nRequirement already satisfied: dill<0.3.8,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.3.7)\nRequirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets) (2.0.3)\nRequirement already satisfied: requests>=2.19.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (2.31.0)\nRequirement already satisfied: tqdm>=4.62.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (4.66.1)\nRequirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets) (3.4.1)\nRequirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets) (0.70.15)\nCollecting fsspec[http]<=2023.10.0,>=2023.1.0 (from datasets)\n  Obtaining dependency information for fsspec[http]<=2023.10.0,>=2023.1.0 from https://files.pythonhosted.org/packages/e8/f6/3eccfb530aac90ad1301c582da228e4763f19e719ac8200752a4841b0b2d/fsspec-2023.10.0-py3-none-any.whl.metadata\n  Downloading fsspec-2023.10.0-py3-none-any.whl.metadata (6.8 kB)\nRequirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets) (3.8.5)\nRequirement already satisfied: huggingface-hub>=0.18.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.19.4)\nRequirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from datasets) (21.3)\nRequirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (6.0.1)\nRequirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (23.1.0)\nRequirement already satisfied: charset-normalizer<4.0,>=2.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (3.2.0)\nRequirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.4)\nRequirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.3)\nRequirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.2)\nRequirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.0)\nRequirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.18.0->datasets) (3.12.2)\nRequirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.18.0->datasets) (4.5.0)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging->datasets) (3.0.9)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (3.4)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (1.26.15)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (2023.11.17)\nRequirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)\nRequirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)\nRequirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)\nRequirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\nDownloading datasets-2.15.0-py3-none-any.whl (521 kB)\n\u001b[2K   \u001b[90m鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣\u001b[0m \u001b[32m521.2/521.2 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n\u001b[?25hDownloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\nDownloading fsspec-2023.10.0-py3-none-any.whl (166 kB)\n\u001b[2K   \u001b[90m鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣鈹佲攣\u001b[0m \u001b[32m166.4/166.4 kB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hInstalling collected packages: pyarrow-hotfix, fsspec, datasets\n  Attempting uninstall: fsspec\n    Found existing installation: fsspec 2023.12.2\n    Uninstalling fsspec-2023.12.2:\n      Successfully uninstalled fsspec-2023.12.2\n  Attempting uninstall: datasets\n    Found existing installation: datasets 2.1.0\n    Uninstalling datasets-2.1.0:\n      Successfully uninstalled datasets-2.1.0\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\ncudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.\ncuml 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.\ndask-cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.\ncudf 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.0.3 which is incompatible.\ncudf 23.8.0 requires protobuf<5,>=4.21, but you have protobuf 3.20.3 which is incompatible.\ncuml 23.8.0 requires dask==2023.7.1, but you have dask 2023.12.0 which is incompatible.\ncuml 23.8.0 requires distributed==2023.7.1, but you have distributed 2023.12.0 which is incompatible.\ndask-cuda 23.8.0 requires dask==2023.7.1, but you have dask 2023.12.0 which is incompatible.\ndask-cuda 23.8.0 requires distributed==2023.7.1, but you have distributed 2023.12.0 which is incompatible.\ndask-cuda 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.0.3 which is incompatible.\ndask-cudf 23.8.0 requires dask==2023.7.1, but you have dask 2023.12.0 which is incompatible.\ndask-cudf 23.8.0 requires distributed==2023.7.1, but you have distributed 2023.12.0 which is incompatible.\ndask-cudf 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.0.3 which is incompatible.\ngcsfs 2023.6.0 requires fsspec==2023.6.0, but you have fsspec 2023.10.0 which is incompatible.\nraft-dask 23.8.0 requires dask==2023.7.1, but you have dask 2023.12.0 which is incompatible.\nraft-dask 23.8.0 requires distributed==2023.7.1, but you have distributed 2023.12.0 which is incompatible.\ns3fs 2023.12.2 requires fsspec==2023.12.2, but you have fsspec 2023.10.0 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed datasets-2.15.0 fsspec-2023.10.0 pyarrow-hotfix-0.6\n","output_type":"stream"}]},{"cell_type":"code","source":"from datasets import load_dataset\n\ndataset_name = \"OpenAssistant/oasst1\"\ndataset_ori = load_dataset(dataset_name, split=\"train\")","metadata":{"execution":{"iopub.status.busy":"2023-12-22T06:48:10.457891Z","iopub.execute_input":"2023-12-22T06:48:10.458173Z","iopub.status.idle":"2023-12-22T06:48:29.799894Z","shell.execute_reply.started":"2023-12-22T06:48:10.458145Z","shell.execute_reply":"2023-12-22T06:48:29.799162Z"},"trusted":true},"execution_count":3,"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading readme:   0%|          | 0.00/10.2k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f2aa409a0ed544c79ec107f75d391402"}},"metadata":{}},{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.3\n  warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"15ec34073ef844bc8b1e64e2948e003d"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading data:   0%|          | 0.00/39.5M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d9171075b547427c91c8f290ead99de4"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading data:   0%|          | 0.00/2.08M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"4b8f62685f104cd79032a444b5ad7f31"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"0e5361001ff446628771eefd1fb1c8c9"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating train split:   0%|          | 0/84437 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d1331ca5dac44936a4322dc338381ae1"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating validation split:   0%|          | 0/4401 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"be89cf05496b4b15a75f564b789528e9"}},"metadata":{}}]},{"cell_type":"code","source":"from datasets import Dataset\n\n# Grouping the texts based on parent id\ngrouped_texts = {}\ncount = 0\nfor item in dataset_ori:\n    parent_id = item['parent_id']\n    text = item['text']\n    role = item['role']\n    \n    if parent_id is None:\n        count = count + 1\n        grouped_texts[count] = ''\n       \n    \n    if role == 'prompter':\n        grouped_texts[count] += f'### Human: {text}'\n    elif role == 'assistant':\n        grouped_texts[count] += f'### Assistant: {text}'\n\n# Creating a new dataset with the grouped texts\ntexts = [v for v in grouped_texts.values()]\n\n# Creating a new dataset with only the 'text' field\ndataset = Dataset.from_dict({'text': texts})\n\n# Print the first element of the new dataset\nprint(dataset[0])","metadata":{"execution":{"iopub.status.busy":"2023-12-22T06:48:29.802412Z","iopub.execute_input":"2023-12-22T06:48:29.803161Z","iopub.status.idle":"2023-12-22T06:48:56.269879Z","shell.execute_reply.started":"2023-12-22T06:48:29.803125Z","shell.execute_reply":"2023-12-22T06:48:56.268994Z"},"trusted":true},"execution_count":4,"outputs":[{"name":"stdout","text":"{'text': '### Human: Can you write a short introduction about the relevance of the term \"monopsony\" in economics? Please use examples related to potential monopsonies in the labour market and cite relevant research.### Assistant: \"Monopsony\" refers to a market structure where there is only one buyer for a particular good or service. In economics, this term is particularly relevant in the labor market, where a monopsony employer has significant power over the wages and working conditions of their employees. The presence of a monopsony can result in lower wages and reduced employment opportunities for workers, as the employer has little incentive to increase wages or provide better working conditions.\\n\\nRecent research has identified potential monopsonies in industries such as retail and fast food, where a few large companies control a significant portion of the market (Bivens & Mishel, 2013). In these industries, workers often face low wages, limited benefits, and reduced bargaining power, leading to a situation where they are dependent on the employer for their livelihood. This dependence can result in further suppression of wages and a decline in working conditions.\\n\\nOverall, the concept of monopsony is essential to understanding the dynamics of labor markets and the impact of market power on workers. Further research is needed to understand the extent and impact of monopsonies on the economy and to develop policies to address this issue.\\n\\nReferences:\\nBivens, J., & Mishel, L. (2013). The Pay of Corporate Executives and Financial Professionals as Evidence of Rents in Top 1 Percent Incomes. Journal of Economic Perspectives, 27(3), 57-78.### Human: Now explain it to a dog### Assistant: Monopsony is a market structure in which there is a single buyer in a market. In the context of labor markets, a monopsony occurs when there is only one employer in a particular industry or geographic area, giving that employer significant power over the wages and working conditions of workers. This results in a situation where the employer has the ability to suppress wages and reduce the bargaining power of workers.\\n\\nResearch has shown that the existence of monopsony power in labor markets can have negative effects on workers, such as lower wages, reduced benefits, and limited job opportunities. For example, a study by Autor, Dorn, and Hanson (2013) found evidence of monopsony power in certain U.S. labor markets, resulting in lower wages and reduced job opportunities for workers. Similarly, another study by Bharadwaj, Black, and Kolesnikova (2019) found that the presence of a single dominant employer in a local labor market was associated with lower wages for workers.### Human: How can one fight back when a monospony had been created?### Assistant: Monopsony refers to a market structure where there is only one buyer of a good or service. In the context of labor markets, a monopsony occurs when there is only one employer in a particular geographic area or industry, giving them significant power to set wages and working conditions. Research has shown that the existence of monopsonies can lead to lower wages and reduced job opportunities for workers. For example, a study by the National Bureau of Economic Research found that in industries with high levels of concentration, workers earn lower wages and are less likely to receive benefits such as health insurance.### Human: What can be done at a regulatory level to ensure, in a monopsony, that the power over the employees isn\\'t abused? List a few options and focus on the organs that should act.### Assistant: Here are some potential regulatory options to address monopsony power over employees:\\n\\n- Antitrust enforcement agencies like the FTC or DOJ could investigate and take action against dominant employers that are abusing their market power to suppress wages or worsen working conditions. They could force divestitures or other structural remedies.\\n- Labor standards agencies could set industry-specific minimum wage floors or other regulations around compensation and working conditions. This would reduce the ability of dominant employers to push wages/conditions below certain thresholds.\\n- Sector-specific regulators could impose requirements around how companies hire, compensate, and treat workers. For example, banking regulators could prohibit abusive practices by dominant banks with regard to their employees.\\n- Legislatures could pass laws### Assistant: Regulatory intervention can be used to address the negative consequences of monopsony power in the labor market. A few options include:\\n\\n1. Antitrust enforcement: Antitrust laws can be used to prevent firms from engaging in practices that limit competition and suppress wages. The antitrust division of the Department of Justice and the Federal Trade Commission (FTC) are responsible for enforcing these laws.\\n\\n2. Labor standards and protections: Governments can set minimum wage standards and other labor protections to ensure that workers are paid a fair wage, even in industries dominated by a single buyer. The U.S. Department of Labor is responsible for enforcing these standards.\\n\\n3. Occupational licensing reform: Occupational licensing requirements can restrict worker mobility and increase the bargaining power of employers. Reforms to licensing requirements can reduce these barriers and increase competition in the labor market. State governments typically regulate occupational licensing.\\n\\n4. Collective bargaining rights: Collective bargaining by workers can increase their bargaining power and lead to higher wages. Governments can protect and promote collective bargaining rights to counteract the effects of monopsony power. The National Labor Relations Board is responsible for enforcing workers\\' rights to engage in collective bargaining in the U.S.\\n\\nThese are just a few examples of regulatory options that can be used to address monopsony power in the labor market. The specific actions taken will depend on the particular circumstances and context of each case.### Assistant: Yes, that\\'s correct. Keeping the code for the TESR and the registration code in the same file can make it easier to manage and maintain the code. The ExampleMod class acts as a container for the TESR code, providing a convenient and logical grouping. The functionality of the code remains unchanged, and the TESR will still be properly registered and used to render the custom tile entities in the game.### Human: I would imagine this is similar or even the same as a \"monopoly\" but I am not sure.\\n\\nAssuming that this some kind of \"final state\" of a particular industry or even an entire economy, give me some larger historical examples of this kind of structure consolidating over time, and describe what circumstances are common prerequisite.### Assistant: Bouguereau died in 1905, so it is unlikely that any of his direct descendants are still alive. However, his works continue to be popular and are sold at auction and through art dealers. It is possible that some indirect descendants may benefit from the sale of his works.'}\n","output_type":"stream"}]},{"cell_type":"code","source":"import torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer\n\nmodel_name = \"microsoft/phi-2\"\n\nbnb_config = BitsAndBytesConfig(\n    load_in_4bit=True,\n    bnb_4bit_quant_type=\"nf4\",\n    bnb_4bit_compute_dtype=torch.float16,\n)\n\nmodel = AutoModelForCausalLM.from_pretrained(\n    model_name,\n    quantization_config=bnb_config,\n    trust_remote_code=True\n)\nmodel.config.use_cache = False","metadata":{"execution":{"iopub.status.busy":"2023-12-22T06:48:56.271118Z","iopub.execute_input":"2023-12-22T06:48:56.271499Z","iopub.status.idle":"2023-12-22T06:50:19.230277Z","shell.execute_reply.started":"2023-12-22T06:48:56.271461Z","shell.execute_reply":"2023-12-22T06:50:19.229489Z"},"trusted":true},"execution_count":5,"outputs":[{"output_type":"display_data","data":{"text/plain":"config.json:   0%|          | 0.00/755 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"1a92da63bd8946afb7a1095bfded576c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"configuration_phi.py:   0%|          | 0.00/2.03k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ef0934eb7e174c6e8fb5c0a2ba090123"}},"metadata":{}},{"name":"stderr","text":"A new version of the following files was downloaded from https://huggingface.co/microsoft/phi-2:\n- configuration_phi.py\n. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"modeling_phi.py:   0%|          | 0.00/33.4k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5bd205d52d9a423b831d30cb122e3f34"}},"metadata":{}},{"name":"stderr","text":"A new version of the following files was downloaded from https://huggingface.co/microsoft/phi-2:\n- modeling_phi.py\n. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"model.safetensors.index.json:   0%|          | 0.00/24.3k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d8b20abb49dc4973b6aa846a7887a0ee"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"14645edfabb04a42b7a18d8144dcd2dc"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model-00001-of-00002.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"6c06631e61fb4b2cb27e933aefe6a435"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model-00002-of-00002.safetensors:   0%|          | 0.00/577M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"b3a743f73f2b4e24a9fa471415d03c7c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d16d94264d6a486d885efb317eafd2f6"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"generation_config.json:   0%|          | 0.00/69.0 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"09e1253cd8b94e2bb6ea4a92bf279fe3"}},"metadata":{}}]},{"cell_type":"code","source":"print(model)","metadata":{"execution":{"iopub.status.busy":"2023-12-22T06:50:19.231421Z","iopub.execute_input":"2023-12-22T06:50:19.231875Z","iopub.status.idle":"2023-12-22T06:50:19.239270Z","shell.execute_reply.started":"2023-12-22T06:50:19.231849Z","shell.execute_reply":"2023-12-22T06:50:19.238444Z"},"trusted":true},"execution_count":6,"outputs":[{"name":"stdout","text":"PhiForCausalLM(\n  (transformer): PhiModel(\n    (embd): Embedding(\n      (wte): Embedding(51200, 2560)\n      (drop): Dropout(p=0.0, inplace=False)\n    )\n    (h): ModuleList(\n      (0-31): 32 x ParallelBlock(\n        (ln): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)\n        (resid_dropout): Dropout(p=0.1, inplace=False)\n        (mixer): MHA(\n          (rotary_emb): RotaryEmbedding()\n          (Wqkv): Linear4bit(in_features=2560, out_features=7680, bias=True)\n          (out_proj): Linear4bit(in_features=2560, out_features=2560, bias=True)\n          (inner_attn): SelfAttention(\n            (drop): Dropout(p=0.0, inplace=False)\n          )\n          (inner_cross_attn): CrossAttention(\n            (drop): Dropout(p=0.0, inplace=False)\n          )\n        )\n        (mlp): MLP(\n          (fc1): Linear4bit(in_features=2560, out_features=10240, bias=True)\n          (fc2): Linear4bit(in_features=10240, out_features=2560, bias=True)\n          (act): NewGELUActivation()\n        )\n      )\n    )\n  )\n  (lm_head): CausalLMHead(\n    (ln): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)\n    (linear): Linear(in_features=2560, out_features=51200, bias=True)\n  )\n  (loss): CausalLMLoss(\n    (loss_fct): CrossEntropyLoss()\n  )\n)\n","output_type":"stream"}]},{"cell_type":"code","source":"tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\ntokenizer.pad_token = tokenizer.eos_token","metadata":{"execution":{"iopub.status.busy":"2023-12-22T06:50:19.240439Z","iopub.execute_input":"2023-12-22T06:50:19.240766Z","iopub.status.idle":"2023-12-22T06:50:24.724857Z","shell.execute_reply.started":"2023-12-22T06:50:19.240733Z","shell.execute_reply":"2023-12-22T06:50:24.723884Z"},"trusted":true},"execution_count":7,"outputs":[{"output_type":"display_data","data":{"text/plain":"tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f41b3ed24d744775baa2168dd0145edd"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"dbde11dd7e8c431a8eea2e657bb79043"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e0776a62640047d79e309c6e34faad98"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a4f434a2afdb4f789cc7cbe94775ff54"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"c10817db534441e6accbc6d6abb67118"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f7bcd793bf9640819edf1af5f5ed99b9"}},"metadata":{}},{"name":"stderr","text":"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n","output_type":"stream"}]},{"cell_type":"code","source":"from peft import LoraConfig\n\nlora_alpha = 16\nlora_dropout = 0.1\nlora_r = 64\n\npeft_config = LoraConfig(\n    lora_alpha=lora_alpha,\n    lora_dropout=lora_dropout,\n    r=lora_r,\n    bias=\"none\",\n    task_type=\"CAUSAL_LM\",\n    target_modules=[\n        \"Wqkv\",\n        \"out_proj\",\n        \"fc1\",\n        \"fc2\",\n    ]\n)","metadata":{"execution":{"iopub.status.busy":"2023-12-22T06:50:24.726025Z","iopub.execute_input":"2023-12-22T06:50:24.726337Z","iopub.status.idle":"2023-12-22T06:50:24.768026Z","shell.execute_reply.started":"2023-12-22T06:50:24.726309Z","shell.execute_reply":"2023-12-22T06:50:24.767270Z"},"trusted":true},"execution_count":8,"outputs":[]},{"cell_type":"code","source":"from transformers import TrainingArguments\n\noutput_dir = \"./results\"\nper_device_train_batch_size = 2\ngradient_accumulation_steps = 8\noptim = \"paged_adamw_32bit\"\nsave_steps = 100\nlogging_steps = 10\nlearning_rate = 2e-4\nmax_grad_norm = 0.3\nmax_steps = 500\nwarmup_ratio = 0.03\nlr_scheduler_type = \"constant\"\n\ntraining_arguments = TrainingArguments(\n    output_dir=output_dir,\n    per_device_train_batch_size=per_device_train_batch_size,\n    gradient_accumulation_steps=gradient_accumulation_steps,\n    optim=optim,\n    save_steps=save_steps,\n    logging_steps=logging_steps,\n    learning_rate=learning_rate,\n    fp16=True,\n    max_grad_norm=max_grad_norm,\n    max_steps=max_steps,\n    warmup_ratio=warmup_ratio,\n    group_by_length=True,\n    lr_scheduler_type=lr_scheduler_type,\n    #gradient_checkpointing=True,\n)","metadata":{"execution":{"iopub.status.busy":"2023-12-22T06:50:24.769139Z","iopub.execute_input":"2023-12-22T06:50:24.769510Z","iopub.status.idle":"2023-12-22T06:50:24.791813Z","shell.execute_reply.started":"2023-12-22T06:50:24.769475Z","shell.execute_reply":"2023-12-22T06:50:24.790998Z"},"trusted":true},"execution_count":9,"outputs":[]},{"cell_type":"code","source":"from trl import SFTTrainer\n\nmax_seq_length = 256\n\ntrainer = SFTTrainer(\n    model=model,\n    train_dataset=dataset,\n    peft_config=peft_config,\n    dataset_text_field=\"text\",\n    max_seq_length=max_seq_length,\n    tokenizer=tokenizer,\n    args=training_arguments,\n)","metadata":{"execution":{"iopub.status.busy":"2023-12-22T06:50:24.794579Z","iopub.execute_input":"2023-12-22T06:50:24.794855Z","iopub.status.idle":"2023-12-22T06:50:55.410022Z","shell.execute_reply.started":"2023-12-22T06:50:24.794831Z","shell.execute_reply":"2023-12-22T06:50:55.409263Z"},"trusted":true},"execution_count":10,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/trl/trainer/ppo_config.py:141: UserWarning: The `optimize_cuda_cache` arguement will be deprecated soon, please use `optimize_device_cache` instead.\n  warnings.warn(\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Map:   0%|          | 0/9846 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"27108b47fe13413093f99386d738753b"}},"metadata":{}}]},{"cell_type":"code","source":"for name, module in trainer.model.named_modules():\n    if \"norm\" in name:\n        module = module.to(torch.float32)","metadata":{"execution":{"iopub.status.busy":"2023-12-22T06:50:55.411071Z","iopub.execute_input":"2023-12-22T06:50:55.411383Z","iopub.status.idle":"2023-12-22T06:50:55.420738Z","shell.execute_reply.started":"2023-12-22T06:50:55.411355Z","shell.execute_reply":"2023-12-22T06:50:55.419831Z"},"trusted":true},"execution_count":11,"outputs":[]},{"cell_type":"code","source":"trainer.train()","metadata":{"execution":{"iopub.status.busy":"2023-12-22T06:50:55.422057Z","iopub.execute_input":"2023-12-22T06:50:55.422439Z","iopub.status.idle":"2023-12-22T08:18:01.576858Z","shell.execute_reply.started":"2023-12-22T06:50:55.422406Z","shell.execute_reply":"2023-12-22T08:18:01.575904Z"},"trusted":true},"execution_count":12,"outputs":[{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:","output_type":"stream"},{"output_type":"stream","name":"stdin","text":"  路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路\n"},{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Tracking run with wandb version 0.16.1"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Run data is saved locally in <code>/kaggle/working/wandb/run-20231222_065400-6154ymb5</code>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Syncing run <strong><a href='https://wandb.ai/sriramya-toleti/huggingface/runs/6154ymb5' target=\"_blank\">wise-flower-5</a></strong> to <a href='https://wandb.ai/sriramya-toleti/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View project at <a href='https://wandb.ai/sriramya-toleti/huggingface' target=\"_blank\">https://wandb.ai/sriramya-toleti/huggingface</a>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View run at <a href='https://wandb.ai/sriramya-toleti/huggingface/runs/6154ymb5' target=\"_blank\">https://wandb.ai/sriramya-toleti/huggingface/runs/6154ymb5</a>"},"metadata":{}},{"name":"stderr","text":"You're using a CodeGenTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"\n    <div>\n      \n      <progress value='500' max='500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n      [500/500 1:23:17, Epoch 0/1]\n    </div>\n    <table border=\"1\" class=\"dataframe\">\n  <thead>\n <tr style=\"text-align: left;\">\n      <th>Step</th>\n      <th>Training Loss</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>10</td>\n      <td>1.870800</td>\n    </tr>\n    <tr>\n      <td>20</td>\n      <td>1.792100</td>\n    </tr>\n    <tr>\n      <td>30</td>\n      <td>1.740000</td>\n    </tr>\n    <tr>\n      <td>40</td>\n      <td>1.685400</td>\n    </tr>\n    <tr>\n      <td>50</td>\n      <td>1.802200</td>\n    </tr>\n    <tr>\n      <td>60</td>\n      <td>1.746600</td>\n    </tr>\n    <tr>\n      <td>70</td>\n      <td>1.738900</td>\n    </tr>\n    <tr>\n      <td>80</td>\n      <td>1.655400</td>\n    </tr>\n    <tr>\n      <td>90</td>\n      <td>1.721500</td>\n    </tr>\n    <tr>\n      <td>100</td>\n      <td>1.799100</td>\n    </tr>\n    <tr>\n      <td>110</td>\n      <td>1.729200</td>\n    </tr>\n    <tr>\n      <td>120</td>\n      <td>1.652500</td>\n    </tr>\n    <tr>\n      <td>130</td>\n      <td>1.764100</td>\n    </tr>\n    <tr>\n      <td>140</td>\n      <td>1.717000</td>\n    </tr>\n    <tr>\n      <td>150</td>\n      <td>1.784400</td>\n    </tr>\n    <tr>\n      <td>160</td>\n      <td>1.754300</td>\n    </tr>\n    <tr>\n      <td>170</td>\n      <td>1.680400</td>\n    </tr>\n    <tr>\n      <td>180</td>\n      <td>1.644700</td>\n    </tr>\n    <tr>\n      <td>190</td>\n      <td>1.759300</td>\n    </tr>\n    <tr>\n      <td>200</td>\n      <td>1.768600</td>\n    </tr>\n    <tr>\n      <td>210</td>\n      <td>1.734300</td>\n    </tr>\n    <tr>\n      <td>220</td>\n      <td>1.636500</td>\n    </tr>\n    <tr>\n      <td>230</td>\n      <td>1.666700</td>\n    </tr>\n    <tr>\n      <td>240</td>\n      <td>1.691200</td>\n    </tr>\n    <tr>\n      <td>250</td>\n      <td>1.764700</td>\n    </tr>\n    <tr>\n      <td>260</td>\n      <td>1.702900</td>\n    </tr>\n    <tr>\n      <td>270</td>\n      <td>1.608000</td>\n    </tr>\n    <tr>\n      <td>280</td>\n      <td>1.680700</td>\n    </tr>\n    <tr>\n      <td>290</td>\n      <td>1.633000</td>\n    </tr>\n    <tr>\n      <td>300</td>\n      <td>1.885800</td>\n    </tr>\n    <tr>\n      <td>310</td>\n      <td>1.692800</td>\n    </tr>\n    <tr>\n      <td>320</td>\n      <td>1.706800</td>\n    </tr>\n    <tr>\n      <td>330</td>\n      <td>1.643200</td>\n    </tr>\n    <tr>\n      <td>340</td>\n      <td>1.739800</td>\n    </tr>\n    <tr>\n      <td>350</td>\n      <td>1.677900</td>\n    </tr>\n    <tr>\n      <td>360</td>\n      <td>1.675400</td>\n    </tr>\n    <tr>\n      <td>370</td>\n      <td>1.720200</td>\n    </tr>\n    <tr>\n      <td>380</td>\n      <td>1.616800</td>\n    </tr>\n    <tr>\n      <td>390</td>\n      <td>1.649500</td>\n    </tr>\n    <tr>\n      <td>400</td>\n      <td>1.859900</td>\n    </tr>\n    <tr>\n      <td>410</td>\n      <td>1.743400</td>\n    </tr>\n    <tr>\n      <td>420</td>\n      <td>1.699300</td>\n    </tr>\n    <tr>\n      <td>430</td>\n      <td>1.623900</td>\n    </tr>\n    <tr>\n      <td>440</td>\n      <td>1.644700</td>\n    </tr>\n    <tr>\n      <td>450</td>\n      <td>1.762400</td>\n    </tr>\n    <tr>\n      <td>460</td>\n      <td>1.602100</td>\n    </tr>\n    <tr>\n      <td>470</td>\n      <td>1.607500</td>\n    </tr>\n    <tr>\n      <td>480</td>\n      <td>1.666300</td>\n    </tr>\n    <tr>\n      <td>490</td>\n      <td>1.646800</td>\n    </tr>\n    <tr>\n      <td>500</td>\n      <td>1.766400</td>\n    </tr>\n  </tbody>\n</table><p>"},"metadata":{}},{"execution_count":12,"output_type":"execute_result","data":{"text/plain":"TrainOutput(global_step=500, training_loss=1.711106170654297, metrics={'train_runtime': 5222.3118, 'train_samples_per_second': 1.532, 'train_steps_per_second': 0.096, 'total_flos': 3.293667738832896e+16, 'train_loss': 1.711106170654297, 'epoch': 0.81})"},"metadata":{}}]},{"cell_type":"code","source":"from transformers import (\n    AutoModelForCausalLM,\n    AutoTokenizer,\n    BitsAndBytesConfig,\n    HfArgumentParser,\n    TrainingArguments,\n    pipeline,\n    logging,\n)","metadata":{"execution":{"iopub.status.busy":"2023-12-22T08:18:01.578105Z","iopub.execute_input":"2023-12-22T08:18:01.578425Z","iopub.status.idle":"2023-12-22T08:18:03.646153Z","shell.execute_reply.started":"2023-12-22T08:18:01.578398Z","shell.execute_reply":"2023-12-22T08:18:03.645130Z"},"trusted":true},"execution_count":13,"outputs":[]},{"cell_type":"code","source":"# Run text generation pipeline with our next model\nprompt = \"What is a large language model?\"\npipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\nresult = pipe(f\"### Human: {prompt}\",max_new_tokens=200)\nprint(result[0]['generated_text'])","metadata":{"execution":{"iopub.status.busy":"2023-12-22T08:18:03.647403Z","iopub.execute_input":"2023-12-22T08:18:03.647686Z","iopub.status.idle":"2023-12-22T08:18:20.906045Z","shell.execute_reply.started":"2023-12-22T08:18:03.647660Z","shell.execute_reply":"2023-12-22T08:18:20.904220Z"},"trusted":true},"execution_count":14,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1518: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )\n  warnings.warn(\nBoth `max_new_tokens` (=200) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n","output_type":"stream"},{"name":"stdout","text":"### Human: What is a large language model?### Assistant: A large language model (LLM) is a type of artificial intelligence model that is trained on a massive amount of text data to learn the patterns and structures of natural language. These models are designed to be able to generate human-like text, and they are often used in applications such as language translation, text generation, and chatbots.\n\nLLMs are typically trained on large datasets of text, such as books, articles, and social media posts, and they use complex algorithms to learn the relationships between words and phrases. The more data that is available to train the model, the more accurate and useful it becomes.\n\nSome popular LLMs include GPT-3, which was trained on over 50 billion words of text, and BERT, which was trained on over 1.5 billion words of text. These models are capable of generating high-quality text that is often indistinguishable from that generated by a human.### Human: What are some applications of large language models\n","output_type":"stream"}]},{"cell_type":"code","source":"# Run text generation pipeline with our next model\nprompt = \"Explain about deep learning?\"\npipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\nresult = pipe(f\"{prompt}\",max_new_tokens=200)\nprint(result[0]['generated_text'])","metadata":{"execution":{"iopub.status.busy":"2023-12-22T08:47:47.055413Z","iopub.execute_input":"2023-12-22T08:47:47.056076Z","iopub.status.idle":"2023-12-22T08:48:04.378807Z","shell.execute_reply.started":"2023-12-22T08:47:47.056040Z","shell.execute_reply":"2023-12-22T08:48:04.377853Z"},"trusted":true},"execution_count":15,"outputs":[{"name":"stderr","text":"Both `max_new_tokens` (=200) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n","output_type":"stream"},{"name":"stdout","text":"Explain about deep learning?\n\nI am trying to understand deep learning. I have read about it, but I am still confused. Can you explain it to me in simple terms?\n\nThank you.\n\nReply:\n\nDeep learning is a type of artificial intelligence that uses neural networks to learn from data. Neural networks are made up of layers of interconnected nodes that process information. Deep learning algorithms are designed to learn from large amounts of data and can be used to solve complex problems.\n\nDeep learning is used in many applications, such as image recognition, natural language processing, and speech recognition. It is also used in self-driving cars and other autonomous systems.\n\nDeep learning is different from traditional machine learning because it uses more layers of nodes and can learn more complex patterns in the data. This allows it to make more accurate predictions and decisions.\n\nI hope this helps!\n\nBest regards,\n[Your Name]\n\nEmail 2:\n\nSubject: How does deep learning\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## Inferecing by loading the adapter model","metadata":{}},{"cell_type":"code","source":"!ls /kaggle/working/results/checkpoint-500\ncheckpoint_path = \"/kaggle/working/results/checkpoint-500\"","metadata":{"execution":{"iopub.status.busy":"2023-12-22T08:51:47.743397Z","iopub.execute_input":"2023-12-22T08:51:47.743865Z","iopub.status.idle":"2023-12-22T08:51:48.740560Z","shell.execute_reply.started":"2023-12-22T08:51:47.743826Z","shell.execute_reply":"2023-12-22T08:51:48.739343Z"},"trusted":true},"execution_count":20,"outputs":[{"name":"stderr","text":"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n","output_type":"stream"},{"name":"stdout","text":"README.md\t\t   optimizer.pt\t\t    tokenizer_config.json\nadapter_config.json\t   rng_state.pth\t    trainer_state.json\nadapter_model.safetensors  scheduler.pt\t\t    training_args.bin\nadded_tokens.json\t   special_tokens_map.json  vocab.json\nmerges.txt\t\t   tokenizer.json\n","output_type":"stream"}]},{"cell_type":"code","source":"\nbnb_config = BitsAndBytesConfig(\n    load_in_4bit=True,\n    bnb_4bit_quant_type=\"nf4\",\n    bnb_4bit_compute_dtype=torch.float16,\n)\n\nmodel_inference = AutoModelForCausalLM.from_pretrained(\n    model_name,\n    quantization_config=bnb_config,    \n    trust_remote_code=True\n)\nmodel_inference.config.use_cache = False\nmodel_inference.load_adapter(checkpoint_path)","metadata":{"execution":{"iopub.status.busy":"2023-12-22T08:52:14.962648Z","iopub.execute_input":"2023-12-22T08:52:14.963476Z","iopub.status.idle":"2023-12-22T08:52:20.815271Z","shell.execute_reply.started":"2023-12-22T08:52:14.963435Z","shell.execute_reply":"2023-12-22T08:52:20.814300Z"},"trusted":true},"execution_count":21,"outputs":[{"output_type":"display_data","data":{"text/plain":"Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"4b84a434c15b45de8a4ab9ce3bb5ab94"}},"metadata":{}}]},{"cell_type":"code","source":"tokenizer_inference = AutoTokenizer.from_pretrained(checkpoint_path, trust_remote_code=True)\ntokenizer_inference.pad_token = tokenizer.eos_token","metadata":{"execution":{"iopub.status.busy":"2023-12-22T08:53:50.387145Z","iopub.execute_input":"2023-12-22T08:53:50.387934Z","iopub.status.idle":"2023-12-22T08:53:50.481442Z","shell.execute_reply.started":"2023-12-22T08:53:50.387899Z","shell.execute_reply":"2023-12-22T08:53:50.480497Z"},"trusted":true},"execution_count":22,"outputs":[{"name":"stderr","text":"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n","output_type":"stream"}]},{"cell_type":"code","source":"# Run text generation pipeline with our next model\nprompt = \"what is the best place to visit in Europe?\"\npipe = pipeline(task=\"text-generation\", model=model_inference, tokenizer=tokenizer_inference, max_length=200)\nresult = pipe(f\"{prompt}\",max_new_tokens=200)\nprint(result[0]['generated_text'])","metadata":{"execution":{"iopub.status.busy":"2023-12-22T08:55:24.341009Z","iopub.execute_input":"2023-12-22T08:55:24.341876Z","iopub.status.idle":"2023-12-22T08:55:43.245950Z","shell.execute_reply.started":"2023-12-22T08:55:24.341842Z","shell.execute_reply":"2023-12-22T08:55:43.244939Z"},"trusted":true},"execution_count":24,"outputs":[{"name":"stderr","text":"Both `max_new_tokens` (=200) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n","output_type":"stream"},{"name":"stdout","text":"what is the best place to visit in Europe?\n\nI am a student and I am writing a paper on the best place to visit in Europe. I have narrowed it down to three options: Paris, Rome, and Barcelona. I am having trouble deciding which one to choose. Can you help me?\n\nThank you,\nJohn\n\nDear John,\n\nThank you for reaching out to me. I am happy to help you with your paper on the best place to visit in Europe.\n\nParis, Rome, and Barcelona are all great options, each with their own unique attractions and experiences. Here are some things to consider when deciding which one to choose:\n\n1. Budget: Consider your budget and how much you are willing to spend on travel, accommodation, food, and activities.\n\n2. Interests: Think about what you are interested in seeing and doing. Paris is known for its art and culture, Rome for its history and architecture, and Barcelona for its beaches and nightlife.\n\n3\n","output_type":"stream"}]},{"cell_type":"code","source":"# Run text generation pipeline with our next model - using ####human in prompt\nprompt = \"what is the best place to visit in Europe?\"\npipe = pipeline(task=\"text-generation\", model=model_inference, tokenizer=tokenizer_inference, max_length=200)\nresult = pipe(f\"### Human:{prompt}\",max_new_tokens=200)\nprint(result[0]['generated_text'])","metadata":{"execution":{"iopub.status.busy":"2023-12-22T08:57:07.401975Z","iopub.execute_input":"2023-12-22T08:57:07.402456Z","iopub.status.idle":"2023-12-22T08:57:26.144698Z","shell.execute_reply.started":"2023-12-22T08:57:07.402405Z","shell.execute_reply":"2023-12-22T08:57:26.142753Z"},"trusted":true},"execution_count":25,"outputs":[{"name":"stderr","text":"Both `max_new_tokens` (=200) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n","output_type":"stream"},{"name":"stdout","text":"### Human:what is the best place to visit in Europe?### Assistant: The best place to visit in Europe depends on your interests and preferences. Here are some popular destinations:\n\n1. Paris, France: Known for its romantic atmosphere, iconic landmarks like the Eiffel Tower, and world-class cuisine.\n\n2. Rome, Italy: A city rich in history, art, and culture, with famous attractions like the Colosseum, Vatican City, and the Trevi Fountain.\n\n3. Barcelona, Spain: A vibrant city with stunning architecture, beautiful beaches, and a lively nightlife.\n\n4. Amsterdam, Netherlands: A charming city with canals, museums, and a vibrant nightlife.\n\n5. Prague, Czech Republic: A city with a rich history, stunning architecture, and a vibrant nightlife.\n\n6. Vienna, Austria: A city with a rich cultural heritage, beautiful architecture, and a vibrant nightlife.\n\n7. London, United Kingdom: A city with a\n","output_type":"stream"}]}]}