gatepoet commited on
Commit
c9f427f
1 Parent(s): 9365ea8

Initial commit

Browse files
Files changed (5) hide show
  1. logs.txt +111 -0
  2. mlc-chat-config.json +71 -0
  3. tokenizer.json +0 -0
  4. tokenizer.model +3 -0
  5. tokenizer_config.json +37 -0
logs.txt ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/195 [00:00<?, ?it/s]
1
 
 
2
  0%| | 0/195 [00:00<?, ?it/s]
3
  0%| | 0/195 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/floriadmin/miniforge3/envs/mlc/bin/python -m mlc_llm gen_config ../dist/models/ToolLLaMA-2-7b-v2 --quantization q4f32_1 --conv-template llama-2 --output /tmp/tmpxjsa38do --tensor-parallel-shards 2
2
+ [2024-03-18 21:03:53] INFO auto_config.py:115: Found model configuration: ../dist/models/ToolLLaMA-2-7b-v2/config.json
3
+ [2024-03-18 21:03:53] INFO auto_config.py:153: Found model type: llama. Use `--model-type` to override.
4
+ [2024-03-18 21:03:53] INFO llama_model.py:52: context_window_size not found in config.json. Falling back to max_position_embeddings (4096)
5
+ [2024-03-18 21:03:53] INFO llama_model.py:72: prefill_chunk_size defaults to context_window_size (4096)
6
+ [2024-03-18 21:03:53] INFO config.py:106: Overriding max_batch_size from 1 to 80
7
+ [2024-03-18 21:03:53] INFO config.py:106: Overriding tensor_parallel_shards from 1 to 2
8
+ [2024-03-18 21:03:53] INFO gen_config.py:133: [generation_config.json] Setting bos_token_id: 1
9
+ [2024-03-18 21:03:53] INFO gen_config.py:133: [generation_config.json] Setting eos_token_id: 2
10
+ [2024-03-18 21:03:53] INFO gen_config.py:145: Found tokenizer config: ../dist/models/ToolLLaMA-2-7b-v2/tokenizer.model. Copying to /tmp/tmpxjsa38do/tokenizer.model
11
+ [2024-03-18 21:03:53] INFO gen_config.py:147: Not found tokenizer config: ../dist/models/ToolLLaMA-2-7b-v2/tokenizer.json
12
+ [2024-03-18 21:03:53] INFO gen_config.py:147: Not found tokenizer config: ../dist/models/ToolLLaMA-2-7b-v2/vocab.json
13
+ [2024-03-18 21:03:53] INFO gen_config.py:147: Not found tokenizer config: ../dist/models/ToolLLaMA-2-7b-v2/merges.txt
14
+ [2024-03-18 21:03:53] INFO gen_config.py:147: Not found tokenizer config: ../dist/models/ToolLLaMA-2-7b-v2/added_tokens.json
15
+ [2024-03-18 21:03:53] INFO gen_config.py:145: Found tokenizer config: ../dist/models/ToolLLaMA-2-7b-v2/tokenizer_config.json. Copying to /tmp/tmpxjsa38do/tokenizer_config.json
16
+ [2024-03-18 21:03:53] INFO gen_config.py:153: The model has `tokenizer.model` but not `tokenizer.json`. It is always recommended to prefer JSON instead. Attempting to convert using HuggingFace transformers library
17
+ You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
18
+ [2024-03-18 21:03:54] INFO gen_config.py:167: Succesfully converted `tokenizer.model` to: /tmp/tmpxjsa38do/tokenizer.json
19
+ [2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting pad_token_id: 0
20
+ [2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting temperature: 0.7
21
+ [2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting presence_penalty: 0.0
22
+ [2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting frequency_penalty: 0.0
23
+ [2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting repetition_penalty: 1.0
24
+ [2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting top_p: 0.95
25
+ [2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting mean_gen_len: 128
26
+ [2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting max_gen_len: 512
27
+ [2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting shift_fill_factor: 0.3
28
+ [2024-03-18 21:03:54] INFO gen_config.py:198: Dumping configuration file to: /tmp/tmpxjsa38do/mlc-chat-config.json
29
+ /home/floriadmin/miniforge3/envs/mlc/bin/python -m mlc_llm convert_weight ../dist/models/ToolLLaMA-2-7b-v2 --quantization q4f32_1 --source-format auto --output /tmp/tmpxjsa38do
30
+ [2024-03-18 21:03:55] INFO auto_config.py:115: Found model configuration: ../dist/models/ToolLLaMA-2-7b-v2/config.json
31
+ [2024-03-18 21:03:56] INFO auto_device.py:76: Found device: cuda:0
32
+ [2024-03-18 21:03:56] INFO auto_device.py:76: Found device: cuda:1
33
+ [2024-03-18 21:03:56] INFO auto_device.py:76: Found device: cuda:2
34
+ [2024-03-18 21:03:56] INFO auto_device.py:76: Found device: cuda:3
35
+ [2024-03-18 21:03:56] INFO auto_device.py:76: Found device: cuda:4
36
+ [2024-03-18 21:03:56] INFO auto_device.py:76: Found device: cuda:5
37
+ [2024-03-18 21:03:56] INFO auto_device.py:76: Found device: cuda:6
38
+ [2024-03-18 21:03:56] INFO auto_device.py:76: Found device: cuda:7
39
+ [2024-03-18 21:03:56] INFO auto_device.py:76: Found device: cuda:8
40
+ [2024-03-18 21:03:56] INFO auto_device.py:76: Found device: cuda:9
41
+ [2024-03-18 21:03:57] INFO auto_device.py:85: Not found device: rocm:0
42
+ [2024-03-18 21:03:58] INFO auto_device.py:85: Not found device: metal:0
43
+ [2024-03-18 21:04:02] INFO auto_device.py:76: Found device: vulkan:0
44
+ [2024-03-18 21:04:02] INFO auto_device.py:76: Found device: vulkan:1
45
+ [2024-03-18 21:04:02] INFO auto_device.py:76: Found device: vulkan:2
46
+ [2024-03-18 21:04:02] INFO auto_device.py:76: Found device: vulkan:3
47
+ [2024-03-18 21:04:02] INFO auto_device.py:76: Found device: vulkan:4
48
+ [2024-03-18 21:04:02] INFO auto_device.py:76: Found device: vulkan:5
49
+ [2024-03-18 21:04:02] INFO auto_device.py:76: Found device: vulkan:6
50
+ [2024-03-18 21:04:02] INFO auto_device.py:76: Found device: vulkan:7
51
+ [2024-03-18 21:04:02] INFO auto_device.py:76: Found device: vulkan:8
52
+ [2024-03-18 21:04:02] INFO auto_device.py:76: Found device: vulkan:9
53
+ [2024-03-18 21:04:02] INFO auto_device.py:76: Found device: vulkan:10
54
+ [2024-03-18 21:04:03] INFO auto_device.py:85: Not found device: opencl:0
55
+ [2024-03-18 21:04:03] INFO auto_device.py:33: Using device: cuda:0
56
+ [2024-03-18 21:04:03] INFO auto_weight.py:70: Finding weights in: ../dist/models/ToolLLaMA-2-7b-v2
57
+ [2024-03-18 21:04:03] INFO auto_weight.py:120: Found source weight format: huggingface-torch. Source configuration: ../dist/models/ToolLLaMA-2-7b-v2/pytorch_model.bin.index.json
58
+ [2024-03-18 21:04:03] INFO auto_weight.py:167: Not found Huggingface Safetensor
59
+ [2024-03-18 21:04:03] INFO auto_weight.py:106: Using source weight configuration: ../dist/models/ToolLLaMA-2-7b-v2/pytorch_model.bin.index.json. Use `--source` to override.
60
+ [2024-03-18 21:04:03] INFO auto_weight.py:110: Using source weight format: huggingface-torch. Use `--source-format` to override.
61
+ [2024-03-18 21:04:03] INFO auto_config.py:153: Found model type: llama. Use `--model-type` to override.
62
+ [2024-03-18 21:04:03] INFO llama_model.py:52: context_window_size not found in config.json. Falling back to max_position_embeddings (4096)
63
+ [2024-03-18 21:04:03] INFO llama_model.py:72: prefill_chunk_size defaults to context_window_size (4096)
64
+ Weight conversion with arguments:
65
+ --config ../dist/models/ToolLLaMA-2-7b-v2/config.json
66
+ --quantization GroupQuantize(name='q4f32_1', kind='group-quant', group_size=40, quantize_dtype='int4', storage_dtype='uint32', model_dtype='float32', linear_weight_layout='NK', quantize_embedding=True, quantize_final_fc=True, num_elem_per_storage=8, num_storage_per_group=5, max_int_value=7)
67
+ --model-type llama
68
+ --device cuda:0
69
+ --source ../dist/models/ToolLLaMA-2-7b-v2/pytorch_model.bin.index.json
70
+ --source-format huggingface-torch
71
+ --output /tmp/tmpxjsa38do
72
+ Start storing to cache /tmp/tmpxjsa38do
73
+
74
  0%| | 0/195 [00:00<?, ?it/s]
75
 
76
+
77
  0%| | 0/195 [00:00<?, ?it/s]
78
  0%| | 0/195 [00:00<?, ?it/s]
79
+ Traceback (most recent call last):
80
+ File "<frozen runpy>", line 198, in _run_module_as_main
81
+ File "<frozen runpy>", line 88, in _run_code
82
+ File "/home/floriadmin/mlc-llm/python/mlc_llm/__main__.py", line 47, in <module>
83
+ main()
84
+ File "/home/floriadmin/mlc-llm/python/mlc_llm/__main__.py", line 28, in main
85
+ cli.main(sys.argv[2:])
86
+ File "/home/floriadmin/mlc-llm/python/mlc_llm/cli/convert_weight.py", line 87, in main
87
+ convert_weight(
88
+ File "/home/floriadmin/mlc-llm/python/mlc_llm/interface/convert_weight.py", line 182, in convert_weight
89
+ _convert_args(args)
90
+ File "/home/floriadmin/mlc-llm/python/mlc_llm/interface/convert_weight.py", line 146, in _convert_args
91
+ tvmjs.dump_ndarray_cache(
92
+ File "/home/floriadmin/miniforge3/envs/mlc/lib/python3.11/site-packages/tvm/contrib/tvmjs.py", line 210, in dump_ndarray_cache
93
+ for k, origin_v in param_generator:
94
+ File "/home/floriadmin/mlc-llm/python/mlc_llm/interface/convert_weight.py", line 130, in _param_generator
95
+ for name, param in loader.load(device=args.device, preshard_funcs=preshard_funcs):
96
+ File "/home/floriadmin/mlc-llm/python/mlc_llm/loader/huggingface_loader.py", line 117, in load
97
+ param = self._load_mlc_param(mlc_name, device=device)
98
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
99
+ File "/home/floriadmin/mlc-llm/python/mlc_llm/loader/huggingface_loader.py", line 147, in _load_mlc_param
100
+ self._load_file(path)
101
+ File "/home/floriadmin/mlc-llm/python/mlc_llm/loader/huggingface_loader.py", line 186, in _load_file
102
+ for name, param in load_func(path):
103
+ File "/home/floriadmin/mlc-llm/python/mlc_llm/loader/utils.py", line 42, in load_torch_shard
104
+ for name, param in torch.load(path, map_location=torch.device("cpu")).items():
105
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
106
+ File "/home/floriadmin/miniforge3/envs/mlc/lib/python3.11/site-packages/torch/serialization.py", line 998, in load
107
+ with _open_file_like(f, 'rb') as opened_file:
108
+ ^^^^^^^^^^^^^^^^^^^^^^^^
109
+ File "/home/floriadmin/miniforge3/envs/mlc/lib/python3.11/site-packages/torch/serialization.py", line 445, in _open_file_like
110
+ return _open_file(name_or_buffer, mode)
111
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
112
+ File "/home/floriadmin/miniforge3/envs/mlc/lib/python3.11/site-packages/torch/serialization.py", line 426, in __init__
113
+ super().__init__(open(name, mode))
114
+ ^^^^^^^^^^^^^^^^
115
+ FileNotFoundError: [Errno 2] No such file or directory: '../dist/models/ToolLLaMA-2-7b-v2/pytorch_model-00003-of-00003.bin'
mlc-chat-config.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "llama",
3
+ "quantization": "q4f32_1",
4
+ "model_config": {
5
+ "hidden_size": 4096,
6
+ "intermediate_size": 11008,
7
+ "num_attention_heads": 32,
8
+ "num_hidden_layers": 32,
9
+ "rms_norm_eps": 1e-05,
10
+ "vocab_size": 32000,
11
+ "position_embedding_base": 10000.0,
12
+ "context_window_size": 4096,
13
+ "prefill_chunk_size": 4096,
14
+ "num_key_value_heads": 32,
15
+ "head_dim": 128,
16
+ "tensor_parallel_shards": 2,
17
+ "max_batch_size": 80
18
+ },
19
+ "vocab_size": 32000,
20
+ "context_window_size": 4096,
21
+ "sliding_window_size": -1,
22
+ "prefill_chunk_size": 4096,
23
+ "attention_sink_size": -1,
24
+ "tensor_parallel_shards": 2,
25
+ "mean_gen_len": 128,
26
+ "max_gen_len": 512,
27
+ "shift_fill_factor": 0.3,
28
+ "temperature": 0.7,
29
+ "presence_penalty": 0.0,
30
+ "frequency_penalty": 0.0,
31
+ "repetition_penalty": 1.0,
32
+ "top_p": 0.95,
33
+ "conv_template": {
34
+ "name": "llama-2",
35
+ "system_template": "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n ",
36
+ "system_message": "You are a helpful, respectful and honest assistant.",
37
+ "roles": {
38
+ "user": "[INST]",
39
+ "assistant": "[/INST]",
40
+ "tool": "[INST]"
41
+ },
42
+ "role_templates": {
43
+ "user": "{user_message}",
44
+ "assistant": "{assistant_message}",
45
+ "tool": "{tool_message}"
46
+ },
47
+ "messages": [],
48
+ "seps": [
49
+ " "
50
+ ],
51
+ "role_content_sep": " ",
52
+ "role_empty_sep": " ",
53
+ "stop_str": [
54
+ "[INST]"
55
+ ],
56
+ "stop_token_ids": [
57
+ 2
58
+ ],
59
+ "function_string": "",
60
+ "use_function_calling": false
61
+ },
62
+ "pad_token_id": 0,
63
+ "bos_token_id": 1,
64
+ "eos_token_id": 2,
65
+ "tokenizer_files": [
66
+ "tokenizer.model",
67
+ "tokenizer_config.json",
68
+ "tokenizer.json"
69
+ ],
70
+ "version": "0.1.0"
71
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "legacy": null,
22
+ "model_max_length": 8192,
23
+ "pad_token": null,
24
+ "padding_side": "right",
25
+ "sp_model_kwargs": {},
26
+ "spaces_between_special_tokens": false,
27
+ "tokenizer_class": "LlamaTokenizer",
28
+ "unk_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<unk>",
31
+ "lstrip": false,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "use_default_system_prompt": true
37
+ }