{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "d00601c9", "metadata": {}, "outputs": [], "source": [ "import logging\n", "\n", "logging.basicConfig(level=logging.DEBUG)" ] }, { "cell_type": "code", "execution_count": 2, "id": "a8d52aa0", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:numexpr.utils:Note: NumExpr detected 24 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", "INFO:numexpr.utils:NumExpr defaulting to 8 threads.\n", "INFO:datasets:PyTorch version 2.2.1+cu118 available.\n" ] } ], "source": [ "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "from datasets import load_dataset\n", "import torch" ] }, { "cell_type": "code", "execution_count": 3, "id": "ab513a4e", "metadata": {}, "outputs": [], "source": [ "from awq import AutoAWQForCausalLM\n", "from transformers import AutoTokenizer\n", "\n", "model_path = 'llava-hf/llava-v1.6-vicuna-13b-hf'\n", "quant_path = './llava-v1.6-vicuna-13b-hf-awq'\n", "quant_config = { \"zero_point\": True, \"q_group_size\": 128, \"w_bit\": 4, \"version\": \"GEMM\" }" ] }, { "cell_type": "code", "execution_count": 5, "id": "41d1869f", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ubuntu/.local/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /llava-hf/llava-v1.6-vicuna-13b-hf/resolve/main/config.json HTTP/1.1\" 200 0\n", "DEBUG:filelock:Attempting to acquire lock 140170094142960 on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--llava-v1.6-vicuna-13b-hf/e5a59fb69b73666871f8db1853e3bb796aa2b340.lock\n", "DEBUG:filelock:Lock 140170094142960 acquired on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--llava-v1.6-vicuna-13b-hf/e5a59fb69b73666871f8db1853e3bb796aa2b340.lock\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /llava-hf/llava-v1.6-vicuna-13b-hf/resolve/main/config.json HTTP/1.1\" 200 1341\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "303d81955f424d5487d1fd5af1bbba12", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/1.34k [00:00 30]" ] }, { "cell_type": "code", "execution_count": 7, "id": "5dcf2167", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext HTTP/1.1\" 200 4846\n", "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): s3.amazonaws.com:443\n", "DEBUG:urllib3.connectionpool:https://s3.amazonaws.com:443 \"HEAD /datasets.huggingface.co/datasets/datasets/wikitext/wikitext.py HTTP/1.1\" 200 0\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext HTTP/1.1\" 200 4846\n", "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /datasets/wikitext/resolve/b08601e04326c79dfdd32d625aee71d232d685c3/README.md HTTP/1.1\" 200 0\n", "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /datasets/wikitext/resolve/b08601e04326c79dfdd32d625aee71d232d685c3/.huggingface.yaml HTTP/1.1\" 404 0\n", "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): datasets-server.huggingface.co:443\n", "DEBUG:urllib3.connectionpool:https://datasets-server.huggingface.co:443 \"GET /info?dataset=wikitext HTTP/1.1\" 200 None\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext/revision/b08601e04326c79dfdd32d625aee71d232d685c3 HTTP/1.1\" 200 4846\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext/tree/b08601e04326c79dfdd32d625aee71d232d685c3/wikitext-103-raw-v1?recursive=False&expand=False HTTP/1.1\" 200 1017\n", "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext/revision/b08601e04326c79dfdd32d625aee71d232d685c3 HTTP/1.1\" 200 4846\n", "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /datasets/wikitext/resolve/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_infos.json HTTP/1.1\" 404 0\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext/tree/b08601e04326c79dfdd32d625aee71d232d685c3/wikitext-2-raw-v1?recursive=False&expand=False HTTP/1.1\" 200 751\n", "DEBUG:filelock:Attempting to acquire lock 140170024048176 on /home/ubuntu/.cache/huggingface/datasets/_home_ubuntu_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock\n", "DEBUG:filelock:Lock 140170024048176 acquired on /home/ubuntu/.cache/huggingface/datasets/_home_ubuntu_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock\n", "DEBUG:fsspec.local:open file: /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json\n", "DEBUG:filelock:Attempting to release lock 140170024048176 on /home/ubuntu/.cache/huggingface/datasets/_home_ubuntu_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock\n", "DEBUG:filelock:Lock 140170024048176 released on /home/ubuntu/.cache/huggingface/datasets/_home_ubuntu_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock\n", "DEBUG:filelock:Attempting to acquire lock 140174458159984 on /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3_builder.lock\n", "DEBUG:filelock:Lock 140174458159984 acquired on /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3_builder.lock\n", "DEBUG:fsspec.local:open file: /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json\n", "DEBUG:filelock:Attempting to release lock 140174458159984 on /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3_builder.lock\n", "DEBUG:filelock:Lock 140174458159984 released on /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3_builder.lock\n", "DEBUG:root: * Split into 47 blocks\n", "AWQ: 100%|██████████| 40/40 [16:43<00:00, 25.09s/it]\n" ] } ], "source": [ "model.quantize(tokenizer, quant_config=quant_config, calib_data=load_wikitext())" ] }, { "cell_type": "code", "execution_count": 8, "id": "fa16f58f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2024-05-28 11:40:44,136] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n" ] } ], "source": [ "model.save_quantized(quant_path)" ] }, { "cell_type": "code", "execution_count": 9, "id": "7f8083da", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "('./llava-v1.6-vicuna-13b-hf-awq/tokenizer_config.json',\n", " './llava-v1.6-vicuna-13b-hf-awq/special_tokens_map.json',\n", " './llava-v1.6-vicuna-13b-hf-awq/tokenizer.model',\n", " './llava-v1.6-vicuna-13b-hf-awq/added_tokens.json',\n", " './llava-v1.6-vicuna-13b-hf-awq/tokenizer.json')" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenizer.save_pretrained(quant_path)" ] }, { "cell_type": "code", "execution_count": 11, "id": "840e775b", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:urllib3.connectionpool:Resetting dropped connection: huggingface.co\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/repos/create HTTP/1.1\" 200 153\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/llava-v1.6-vicuna-13b-hf-awq/resolve/main/README.md HTTP/1.1\" 404 0\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/validate-yaml HTTP/1.1\" 200 27\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/models/mesolitica/llava-v1.6-vicuna-13b-hf-awq/preupload/main HTTP/1.1\" 200 442\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /mesolitica/llava-v1.6-vicuna-13b-hf-awq.git/info/lfs/objects/batch HTTP/1.1\" 200 917\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "78149f8211c0456bada0b8e7a7b06038", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.model: 0%| | 0.00/500k [00:00