Initial commit

Browse files

Files changed (10) hide show

logs.txt +105 -0
mlc-chat-config.json +79 -0
params_shard_0.bin +3 -0
params_shard_1.bin +3 -0
params_shard_2.bin +3 -0
params_shard_3.bin +3 -0
params_shard_4.bin +3 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +43 -0

logs.txt ADDED Viewed

@@ -0,0 +1,105 @@
  0%|          | 0/227 [00:00<?, ?it/s]
  0%|          | 0/227 [00:00<?, ?it/s]
  0%|          | 0/227 [00:05<?, ?it/s]
  0%|          | 0/227 [00:06<?, ?it/s]
  0%|          | 0/227 [00:06<?, ?it/s]
  0%|          | 1/227 [00:06<23:41,  6.29s/it]
  0%|          | 1/227 [00:06<23:41,  6.29s/it]
  0%|          | 1/227 [00:18<23:41,  6.29s/it]
  0%|          | 1/227 [00:18<23:41,  6.29s/it]
  0%|          | 1/227 [00:19<23:41,  6.29s/it]
  1%|          | 2/227 [00:20<40:15, 10.74s/it]
  1%|          | 2/227 [00:20<40:15, 10.74s/it]
  1%|          | 2/227 [00:21<40:15, 10.74s/it]
  1%|          | 2/227 [00:21<40:15, 10.74s/it]
  1%|▏         | 3/227 [00:22<24:57,  6.68s/it]
  1%|▏         | 3/227 [00:22<24:57,  6.68s/it]
  1%|▏         | 3/227 [00:22<24:57,  6.68s/it]
  2%|▏         | 5/227 [00:29<21:48,  5.89s/it]

+/opt/conda/envs/py310/bin/python -m mlc_llm gen_config /models/Mixtral-8x7B-Instruct-v0.1 --quantization q4f32_1 --conv-template mistral_default --output /models/mlc-delivery/hf/mlc-ai/Mixtral-8x7B-Instruct-v0.1-q4f32_1-MLC
+[2024-06-06 22:21:44] INFO auto_config.py:116: [92mFound[0m model configuration: /models/Mixtral-8x7B-Instruct-v0.1/config.json
+[2024-06-06 22:21:44] INFO auto_config.py:154: [92mFound[0m model type: [1mmixtral[0m. Use `--model-type` to override.
+[2024-06-06 22:21:44] INFO llama_model.py:52: [1mcontext_window_size[0m not found in config.json. Falling back to [1mmax_position_embeddings[0m (32768)
+[2024-06-06 22:21:44] INFO llama_model.py:72: [1mprefill_chunk_size[0m defaults to 2048
+[2024-06-06 22:21:44] INFO config.py:107: Overriding [1mmax_batch_size[0m from 1 to 80
+[2024-06-06 22:21:44] INFO gen_config.py:143: [generation_config.json] Setting [1mbos_token_id[0m: 1
+[2024-06-06 22:21:44] INFO gen_config.py:143: [generation_config.json] Setting [1meos_token_id[0m: 2
+[2024-06-06 22:21:44] INFO gen_config.py:155: [92mFound[0m tokenizer config: /models/Mixtral-8x7B-Instruct-v0.1/tokenizer.model. Copying to [1m/models/mlc-delivery/hf/mlc-ai/Mixtral-8x7B-Instruct-v0.1-q4f32_1-MLC/tokenizer.model[0m
+[2024-06-06 22:21:44] INFO gen_config.py:155: [92mFound[0m tokenizer config: /models/Mixtral-8x7B-Instruct-v0.1/tokenizer.json. Copying to [1m/models/mlc-delivery/hf/mlc-ai/Mixtral-8x7B-Instruct-v0.1-q4f32_1-MLC/tokenizer.json[0m
+[2024-06-06 22:21:44] INFO gen_config.py:157: [91mNot found[0m tokenizer config: /models/Mixtral-8x7B-Instruct-v0.1/vocab.json
+[2024-06-06 22:21:44] INFO gen_config.py:157: [91mNot found[0m tokenizer config: /models/Mixtral-8x7B-Instruct-v0.1/merges.txt
+[2024-06-06 22:21:44] INFO gen_config.py:157: [91mNot found[0m tokenizer config: /models/Mixtral-8x7B-Instruct-v0.1/added_tokens.json
+[2024-06-06 22:21:44] INFO gen_config.py:155: [92mFound[0m tokenizer config: /models/Mixtral-8x7B-Instruct-v0.1/tokenizer_config.json. Copying to [1m/models/mlc-delivery/hf/mlc-ai/Mixtral-8x7B-Instruct-v0.1-q4f32_1-MLC/tokenizer_config.json[0m
+[2024-06-06 22:21:44] INFO gen_config.py:216: Detected tokenizer info: {'token_postproc_method': 'byte_fallback', 'prepend_space_in_encode': True, 'strip_space_in_decode': True}
+[2024-06-06 22:21:44] INFO gen_config.py:32: [System default] Setting [1mpad_token_id[0m: 0
+[2024-06-06 22:21:44] INFO gen_config.py:32: [System default] Setting [1mtemperature[0m: 1.0
+[2024-06-06 22:21:44] INFO gen_config.py:32: [System default] Setting [1mpresence_penalty[0m: 0.0
+[2024-06-06 22:21:44] INFO gen_config.py:32: [System default] Setting [1mfrequency_penalty[0m: 0.0
+[2024-06-06 22:21:44] INFO gen_config.py:32: [System default] Setting [1mrepetition_penalty[0m: 1.0
+[2024-06-06 22:21:44] INFO gen_config.py:32: [System default] Setting [1mtop_p[0m: 1.0
+[2024-06-06 22:21:44] INFO gen_config.py:223: Dumping configuration file to: [1m/models/mlc-delivery/hf/mlc-ai/Mixtral-8x7B-Instruct-v0.1-q4f32_1-MLC/mlc-chat-config.json[0m
+/opt/conda/envs/py310/bin/python -m mlc_llm convert_weight /models/Mixtral-8x7B-Instruct-v0.1 --quantization q4f32_1 --output /models/mlc-delivery/hf/mlc-ai/Mixtral-8x7B-Instruct-v0.1-q4f32_1-MLC
+[2024-06-06 22:21:46] INFO auto_config.py:116: [92mFound[0m model configuration: /models/Mixtral-8x7B-Instruct-v0.1/config.json
+[2024-06-06 22:21:47] INFO auto_device.py:79: [92mFound[0m device: cuda:0
+[2024-06-06 22:21:49] INFO auto_device.py:88: [91mNot found[0m device: rocm:0
+[2024-06-06 22:21:50] INFO auto_device.py:88: [91mNot found[0m device: metal:0
+[2024-06-06 22:21:52] INFO auto_device.py:79: [92mFound[0m device: vulkan:0
+[2024-06-06 22:21:52] INFO auto_device.py:79: [92mFound[0m device: vulkan:1
+[2024-06-06 22:21:52] INFO auto_device.py:79: [92mFound[0m device: vulkan:2
+[2024-06-06 22:21:52] INFO auto_device.py:79: [92mFound[0m device: vulkan:3
+[2024-06-06 22:21:53] INFO auto_device.py:88: [91mNot found[0m device: opencl:0
+[2024-06-06 22:21:53] INFO auto_device.py:35: Using device: [1mcuda:0[0m
+[2024-06-06 22:21:53] INFO auto_weight.py:71: Finding weights in: /models/Mixtral-8x7B-Instruct-v0.1
+[2024-06-06 22:21:53] INFO auto_weight.py:137: [91mNot found[0m Huggingface PyTorch
+[2024-06-06 22:21:53] INFO auto_weight.py:144: [92mFound[0m source weight format: huggingface-safetensor. Source configuration: /models/Mixtral-8x7B-Instruct-v0.1/model.safetensors.index.json
+[2024-06-06 22:21:53] INFO auto_weight.py:107: Using source weight configuration: [1m/models/Mixtral-8x7B-Instruct-v0.1/model.safetensors.index.json[0m. Use `--source` to override.
+[2024-06-06 22:21:53] INFO auto_weight.py:111: Using source weight format: [1mhuggingface-safetensor[0m. Use `--source-format` to override.
+[2024-06-06 22:21:53] INFO auto_config.py:154: [92mFound[0m model type: [1mmixtral[0m. Use `--model-type` to override.
+[2024-06-06 22:21:53] INFO llama_model.py:52: [1mcontext_window_size[0m not found in config.json. Falling back to [1mmax_position_embeddings[0m (32768)
+[2024-06-06 22:21:53] INFO llama_model.py:72: [1mprefill_chunk_size[0m defaults to 2048
+[1mWeight conversion with arguments:[0m
+  [1m--config[0m          /models/Mixtral-8x7B-Instruct-v0.1/config.json
+  [1m--quantization[0m    GroupQuantize(name='q4f32_1', kind='group-quant', group_size=32, quantize_dtype='int4', storage_dtype='uint32', model_dtype='float32', linear_weight_layout='NK', quantize_embedding=True, quantize_final_fc=True, num_elem_per_storage=8, num_storage_per_group=4, max_int_value=7)
+  [1m--model-type[0m      mixtral
+  [1m--device[0m          cuda:0
+  [1m--source[0m          /models/Mixtral-8x7B-Instruct-v0.1/model.safetensors.index.json
+  [1m--source-format[0m   huggingface-safetensor
+  [1m--output[0m          /models/mlc-delivery/hf/mlc-ai/Mixtral-8x7B-Instruct-v0.1-q4f32_1-MLC
+Start storing to cache /models/mlc-delivery/hf/mlc-ai/Mixtral-8x7B-Instruct-v0.1-q4f32_1-MLC
  0%|          | 0/227 [00:00<?, ?it/s]
  0%|          | 0/227 [00:00<?, ?it/s]
  0%|          | 0/227 [00:05<?, ?it/s]
  0%|          | 0/227 [00:06<?, ?it/s]
  0%|          | 0/227 [00:06<?, ?it/s]
  0%|          | 1/227 [00:06<23:41,  6.29s/it]
  0%|          | 1/227 [00:06<23:41,  6.29s/it]
  0%|          | 1/227 [00:18<23:41,  6.29s/it]
  0%|          | 1/227 [00:18<23:41,  6.29s/it]
  0%|          | 1/227 [00:19<23:41,  6.29s/it]
  1%|          | 2/227 [00:20<40:15, 10.74s/it]
  1%|          | 2/227 [00:20<40:15, 10.74s/it]
  1%|          | 2/227 [00:21<40:15, 10.74s/it]
  1%|          | 2/227 [00:21<40:15, 10.74s/it]
  1%|▏         | 3/227 [00:22<24:57,  6.68s/it]
  1%|▏         | 3/227 [00:22<24:57,  6.68s/it]
  1%|▏         | 3/227 [00:22<24:57,  6.68s/it]
  2%|▏         | 5/227 [00:29<21:48,  5.89s/it]
+Traceback (most recent call last):
+  File "/opt/conda/envs/py310/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/opt/conda/envs/py310/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/__main__.py", line 64, in <module>
+    main()
+  File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/__main__.py", line 37, in main
+    cli.main(sys.argv[2:])
+  File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/cli/convert_weight.py", line 88, in main
+    convert_weight(
+  File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/interface/convert_weight.py", line 181, in convert_weight
+    _convert_args(args)
+  File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/interface/convert_weight.py", line 145, in _convert_args
+    tvmjs.dump_ndarray_cache(
+  File "/opt/conda/envs/py310/lib/python3.10/site-packages/tvm/contrib/tvmjs.py", line 272, in dump_ndarray_cache
+    for k, origin_v in param_generator:
+  File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/interface/convert_weight.py", line 129, in _param_generator
+    for name, param in loader.load(device=args.device, preshard_funcs=preshard_funcs):
+  File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/loader/huggingface_loader.py", line 118, in load
+    param = self._load_mlc_param(mlc_name, device=device)
+  File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/loader/huggingface_loader.py", line 157, in _load_mlc_param
+    return as_ndarray(param, device=device)
+  File "/opt/conda/envs/py310/lib/python3.10/site-packages/tvm/runtime/ndarray.py", line 675, in array
+    return empty(arr.shape, arr.dtype, device, mem_scope).copyfrom(arr)
+  File "/opt/conda/envs/py310/lib/python3.10/site-packages/tvm/runtime/ndarray.py", line 431, in empty
+    arr = _ffi_api.TVMArrayAllocWithScope(shape, dtype, device, mem_scope)
+  File "tvm/_ffi/_cython/./packed_func.pxi", line 332, in tvm._ffi._cy3.core.PackedFuncBase.__call__
+  File "tvm/_ffi/_cython/./packed_func.pxi", line 277, in tvm._ffi._cy3.core.FuncCall
+  File "tvm/_ffi/_cython/./base.pxi", line 182, in tvm._ffi._cy3.core.CHECK_CALL
+  File "/opt/conda/envs/py310/lib/python3.10/site-packages/tvm/_ffi/base.py", line 481, in raise_last_ffi_error
+    raise py_err
+tvm.error.InternalError: Traceback (most recent call last):
+  5: _ZN3tvm7runtime13PackedFun
+  4: tvm::runtime::TypedPackedFunc<tvm::runtime::NDArray (tvm::runtime::ShapeTuple, DLDataType, DLDevice, tvm::runtime::Optional<tvm::runtime::String>)>::AssignTypedLambda<tvm::runtime::NDArray (*)(tvm::runtime::ShapeTuple, DLDataType, DLDevice, tvm::runtime::Optional<tvm::runtime::String>)>(tvm::runtime::NDArray (*)(tvm::runtime::ShapeTuple, DLDataType, DLDevice, tvm::runtime::Optional<tvm::runtime::String>), std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}::operator()(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*) const
+  3: tvm::runtime::NDArray::Empty(tvm::runtime::ShapeTuple, DLDataType, DLDevice, tvm::runtime::Optional<tvm::runtime::String>)
+  2: tvm::runtime::DeviceAPI::AllocDataSpace(DLDevice, int, long const*, DLDataType, tvm::runtime::Optional<tvm::runtime::String>)
+  1: tvm::runtime::CUDADeviceAPI::AllocDataSpace(DLDevice, unsigned long, unsigned long, DLDataType)
+  0: _ZN3tvm7runtime6deta
+  File "/workspace/tvm/src/runtime/cuda/cuda_device_api.cc", line 145
+InternalError: Check failed: (e == cudaSuccess || e == cudaErrorCudartUnloading) is false: CUDA: out of memory

mlc-chat-config.json ADDED Viewed

	@@ -0,0 +1,79 @@

+{
+  "version": "0.1.0",
+  "model_type": "mixtral",
+  "quantization": "q4f32_1",
+  "model_config": {
+    "hidden_size": 4096,
+    "intermediate_size": 14336,
+    "num_attention_heads": 32,
+    "num_hidden_layers": 32,
+    "rms_norm_eps": 1e-05,
+    "vocab_size": 32000,
+    "position_embedding_base": 1000000.0,
+    "context_window_size": 32768,
+    "prefill_chunk_size": 2048,
+    "num_key_value_heads": 8,
+    "head_dim": 128,
+    "tensor_parallel_shards": 1,
+    "max_batch_size": 80,
+    "num_local_experts": 8,
+    "num_experts_per_tok": 2
+  },
+  "vocab_size": 32000,
+  "context_window_size": 32768,
+  "sliding_window_size": -1,
+  "prefill_chunk_size": 2048,
+  "attention_sink_size": -1,
+  "tensor_parallel_shards": 1,
+  "temperature": 1.0,
+  "presence_penalty": 0.0,
+  "frequency_penalty": 0.0,
+  "repetition_penalty": 1.0,
+  "top_p": 1.0,
+  "tokenizer_files": [
+    "tokenizer.model",
+    "tokenizer.json",
+    "tokenizer_config.json"
+  ],
+  "tokenizer_info": {
+    "token_postproc_method": "byte_fallback",
+    "prepend_space_in_encode": true,
+    "strip_space_in_decode": true
+  },
+  "conv_template": {
+    "name": "mistral_default",
+    "system_template": "[INST] {system_message}",
+    "system_message": "Always assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.",
+    "system_prefix_token_ids": [
+      1
+    ],
+    "add_role_after_system_message": false,
+    "roles": {
+      "user": "[INST]",
+      "assistant": "[/INST]",
+      "tool": "[INST]"
+    },
+    "role_templates": {
+      "user": "{user_message}",
+      "assistant": "{assistant_message}",
+      "tool": "{tool_message}"
+    },
+    "messages": [],
+    "seps": [
+      " "
+    ],
+    "role_content_sep": " ",
+    "role_empty_sep": "",
+    "stop_str": [
+      "</s>"
+    ],
+    "stop_token_ids": [
+      2
+    ],
+    "function_string": "",
+    "use_function_calling": false
+  },
+  "pad_token_id": 0,
+  "bos_token_id": 1,
+  "eos_token_id": 2
+}

params_shard_0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2139fc39441ae69ecf90ef1a4407a1e53bdea162ed6134483a07a131af0a290c
+size 65536000

params_shard_1.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3dc50ed18a9ea66cf35d2c494df597a1bd33bd26429b4ea941bb6cc0e29e0c78
+size 469762048

params_shard_2.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:00b376256f2cca2233ff824f675d7e4468f41f570bc63cb0b4c02ebe19b5eaed
+size 58720256

params_shard_3.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f7e75d17b23f2e874301f25e6285650fed5503e5a1d5adf755f59e98a70e777
+size 234881024

params_shard_4.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:261604710e332abd50bb8434b9beda23309083d78840e2732ac17a18bdcc35a5
+size 29360128

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false,
+  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}"
+}