{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "da47e672", "metadata": {}, "outputs": [], "source": [ "# !pip3 install https://github.com/casper-hansen/AutoAWQ/releases/download/v0.1.6/autoawq-0.1.6+cu118-cp310-cp310-linux_x86_64.whl" ] }, { "cell_type": "code", "execution_count": 2, "id": "27063032", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Tue Nov 7 13:34:15 2023 \r\n", "+-----------------------------------------------------------------------------+\r\n", "| NVIDIA-SMI 525.85.12 Driver Version: 525.85.12 CUDA Version: 12.0 |\r\n", "|-------------------------------+----------------------+----------------------+\r\n", "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\r\n", "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\r\n", "| | | MIG M. |\r\n", "|===============================+======================+======================|\r\n", "| 0 NVIDIA A100 80G... On | 00000001:00:00.0 Off | 0 |\r\n", "| N/A 37C P0 65W / 300W | 5536MiB / 81920MiB | 0% Default |\r\n", "| | | Disabled |\r\n", "+-------------------------------+----------------------+----------------------+\r\n", " \r\n", "+-----------------------------------------------------------------------------+\r\n", "| Processes: |\r\n", "| GPU GI CI PID Type Process name GPU Memory |\r\n", "| ID ID Usage |\r\n", "|=============================================================================|\r\n", "+-----------------------------------------------------------------------------+\r\n" ] } ], "source": [ "!nvidia-smi" ] }, { "cell_type": "code", "execution_count": 20, "id": "1bde5916", "metadata": { "scrolled": true }, "outputs": [], "source": [ "from awq import AutoAWQForCausalLM\n", "from transformers import AutoConfig, AwqConfig, AutoTokenizer, AutoModelForCausalLM\n", "import torch\n", "\n", "model_path = 'mesolitica/malaysian-mistral-7b-32k-instructions'" ] }, { "cell_type": "code", "execution_count": 4, "id": "c658280e", "metadata": {}, "outputs": [], "source": [ "# !pip3 install transformers==4.35.0" ] }, { "cell_type": "code", "execution_count": 6, "id": "838ddb85", "metadata": {}, "outputs": [], "source": [ "# model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype = torch.bfloat16)" ] }, { "cell_type": "code", "execution_count": 7, "id": "637b41e1", "metadata": {}, "outputs": [], "source": [ "# model.save_pretrained('./test', safe_serialization = False)" ] }, { "cell_type": "code", "execution_count": 9, "id": "417dbbf5", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "61e83560a1344a4593dd8e2d806992c4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/3 [00:00']\n", " for u, a in zip(users, assistants):\n", " texts.append(f'[INST] {u.strip()} [/INST]{a.strip()} ')\n", "\n", " texts.append(f'[INST] {user_query.strip()} [/INST]')\n", " prompt = ''.join(texts).strip()\n", " return prompt" ] }, { "cell_type": "code", "execution_count": 47, "id": "63315893", "metadata": {}, "outputs": [], "source": [ "messages = [\n", " {'role': 'user', 'content': 'kwsp tu apa'}\n", "]\n", "prompt = parse_mistral_chat(messages)\n", "inputs = tokenizer([prompt], return_tensors='pt', add_special_tokens=False).to('cuda')" ] }, { "cell_type": "code", "execution_count": 49, "id": "8a3c15d8", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 2.67 s, sys: 0 ns, total: 2.67 s\n", "Wall time: 2.67 s\n" ] }, { "data": { "text/plain": [ "' [INST] kwsp tu apa [/INST]kwsp merujuk kepada Kumpulan Wang Simpanan Pekerja, iaitu sebuah organisasi simpanan persaraan yang ditubuhkan oleh kerajaan Malaysia untuk melindungi dan menyediakan simpanan untuk pekerja-pekerja sektor swasta pada akhir penggajian mereka.'" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "generate_kwargs = dict(\n", " inputs,\n", " max_new_tokens=1024,\n", " top_p=0.95,\n", " top_k=50,\n", " temperature=0.9,\n", " do_sample=True,\n", " num_beams=1,\n", ")\n", "r = quantized_model.generate(**generate_kwargs)\n", "tokenizer.decode(r[0])" ] }, { "cell_type": "code", "execution_count": null, "id": "d73d43a0", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }