robbiemu commited on
Commit
5dadba4
1 Parent(s): 6a423d8

update for quantization

Browse files

imatrix

record of change

q5_k_m

bf16

iq2_xs

iq3_m

iq3_s

iq3_xs

iq3_xxs

iq4_nl

iq4_xs

q3_k_l

q3_k_m

q3_k_s

q4_k_m

q4_k_s

q5_k_s

q6_k

q8_0

removing safetensors

Temporarily remove LFS tracking for salamandra_header.png

Remove problematic salamandra_header.png from the repository

Fully remove salamandra_header.png from cache and LFS tracking

Track large files with Git LFS

Remove salamandra_header.png from LFS tracking

Add salamandra_header.png to LFS

update git attributes

Ensure all LFS-tracked model files are added

removing safetensors

removing duplicate data

removing duplicate data

lfs the imatrix

This view is limited to 50 files because it contains too many changes.   See raw diff
.gitattributes CHANGED
@@ -33,5 +33,28 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  images/salamandra_header.png filter=lfs diff=lfs merge=lfs -text
37
- tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ salamandra-2b_IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text
37
+ salamandra-2b_Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
38
+ salamandra-2b_Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
39
+ salamandra-2b_Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
40
+ salamandra-2b_IQ3_S.gguf filter=lfs diff=lfs merge=lfs -text
41
+ salamandra-2b_Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
42
+ salamandra-2b_Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
43
+ salamandra-2b_Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
44
+ salamandra-2b_Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
45
+ salamandra-2b_Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
46
+ salamandra-2b_IQ3_M.gguf filter=lfs diff=lfs merge=lfs -text
47
+ salamandra-2b_IQ3_XS.gguf filter=lfs diff=lfs merge=lfs -text
48
+ salamandra-2b_Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
49
+ salamandra-2b_bf16.gguf filter=lfs diff=lfs merge=lfs -text
50
+ salamandra-2b_IQ2_XS.gguf filter=lfs diff=lfs merge=lfs -text
51
+ salamandra-2b_IQ3_XXS.gguf filter=lfs diff=lfs merge=lfs -text
52
+ salamandra-2b_IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text
53
+ imatrix/oscar/imatrix-dataset.txt filter=lfs diff=lfs merge=lfs -text
54
+ imatrix/oscar/langs/bg.txt filter=lfs diff=lfs merge=lfs -text
55
+ imatrix/oscar/langs/el.txt filter=lfs diff=lfs merge=lfs -text
56
+ imatrix/oscar/langs/ru.txt filter=lfs diff=lfs merge=lfs -text
57
+ imatrix/oscar/langs/uk.txt filter=lfs diff=lfs merge=lfs -text
58
+ imatrix/oscar/langs/*.txt filter=lfs diff=lfs merge=lfs -text
59
  images/salamandra_header.png filter=lfs diff=lfs merge=lfs -text
60
+ imatrix/oscar/imatrix.dat filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .aider*
2
+
3
+ # General
4
+ .DS_Store
5
+ .AppleDouble
6
+ .LSOverride
7
+
8
+ # Icon must end with two \r
9
+ Icon
10
+
11
+ # Thumbnails
12
+ ._*
13
+
14
+ # Files that might appear in the root of a volume
15
+ .DocumentRevisions-V100
16
+ .fseventsd
17
+ .Spotlight-V100
18
+ .TemporaryItems
19
+ .Trashes
20
+ .VolumeIcon.icns
21
+ .com.apple.timemachine.donotpresent
22
+
23
+ # Directories potentially created on remote AFP share
24
+ .AppleDB
25
+ .AppleDesktop
26
+ Network Trash Folder
27
+ Temporary Items
28
+ .apdisk
29
+
30
+ # Byte-compiled / optimized / DLL files
31
+ __pycache__/
32
+ *.py[cod]
33
+ *$py.class
34
+
35
+ # C extensions
36
+ *.so
37
+
38
+ # Distribution / packaging
39
+ .Python
40
+ build/
41
+ develop-eggs/
42
+ dist/
43
+ downloads/
44
+ eggs/
45
+ .eggs/
46
+ lib/
47
+ lib64/
48
+ parts/
49
+ sdist/
50
+ var/
51
+ wheels/
52
+ share/python-wheels/
53
+ *.egg-info/
54
+ .installed.cfg
55
+ *.egg
56
+ MANIFEST
57
+
58
+ # PyInstaller
59
+ # Usually these files are written by a python script from a template
60
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
61
+ *.manifest
62
+ *.spec
63
+
64
+ # Installer logs
65
+ pip-log.txt
66
+ pip-delete-this-directory.txt
67
+
68
+ # Unit test / coverage reports
69
+ htmlcov/
70
+ .tox/
71
+ .nox/
72
+ .coverage
73
+ .coverage.*
74
+ .cache
75
+ nosetests.xml
76
+ coverage.xml
77
+ *.cover
78
+ *.py,cover
79
+ .hypothesis/
80
+ .pytest_cache/
81
+ cover/
82
+
83
+ # Translations
84
+ *.mo
85
+ *.pot
86
+
87
+ # Django stuff:
88
+ *.log
89
+ local_settings.py
90
+ db.sqlite3
91
+ db.sqlite3-journal
92
+
93
+ # Flask stuff:
94
+ instance/
95
+ .webassets-cache
96
+
97
+ # Scrapy stuff:
98
+ .scrapy
99
+
100
+ # Sphinx documentation
101
+ docs/_build/
102
+
103
+ # PyBuilder
104
+ .pybuilder/
105
+ target/
106
+
107
+ # Jupyter Notebook
108
+ .ipynb_checkpoints
109
+
110
+ # IPython
111
+ profile_default/
112
+ ipython_config.py
113
+
114
+ # pyenv
115
+ # For a library or package, you might want to ignore these files since the code is
116
+ # intended to run in multiple environments; otherwise, check them in:
117
+ # .python-version
118
+
119
+ # pipenv
120
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
121
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
122
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
123
+ # install all needed dependencies.
124
+ #Pipfile.lock
125
+
126
+ # poetry
127
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
128
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
129
+ # commonly ignored for libraries.
130
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
131
+ #poetry.lock
132
+
133
+ # pdm
134
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
135
+ #pdm.lock
136
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
137
+ # in version control.
138
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
139
+ .pdm.toml
140
+ .pdm-python
141
+ .pdm-build/
142
+
143
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
144
+ __pypackages__/
145
+
146
+ # Celery stuff
147
+ celerybeat-schedule
148
+ celerybeat.pid
149
+
150
+ # SageMath parsed files
151
+ *.sage.py
152
+
153
+ # Environments
154
+ .env
155
+ .venv
156
+ env/
157
+ venv/
158
+ ENV/
159
+ env.bak/
160
+ venv.bak/
161
+
162
+ # Spyder project settings
163
+ .spyderproject
164
+ .spyproject
165
+
166
+ # Rope project settings
167
+ .ropeproject
168
+
169
+ # mkdocs documentation
170
+ /site
171
+
172
+ # mypy
173
+ .mypy_cache/
174
+ .dmypy.json
175
+ dmypy.json
176
+
177
+ # Pyre type checker
178
+ .pyre/
179
+
180
+ # pytype static type analyzer
181
+ .pytype/
182
+
183
+ # Cython debug symbols
184
+ cython_debug/
185
+
186
+ # PyCharm
187
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
188
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
189
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
190
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
191
+ #.idea/
IQ2_XS_log.txt ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_IQ2_XS.gguf' as IQ2_XS
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to q2_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 164.06 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
47
+
48
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q2_K - using fallback quantization iq4_nl
49
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
50
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
51
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
52
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
53
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
54
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
55
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
56
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
57
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
58
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
59
+
60
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q2_K - using fallback quantization iq4_nl
61
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
62
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
63
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
64
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
65
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
66
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
67
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
68
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
69
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
70
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
71
+
72
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q2_K - using fallback quantization iq4_nl
73
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
74
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
75
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
76
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
78
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
79
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
80
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
81
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
83
+
84
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
85
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
86
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
87
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
88
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
89
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
90
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
91
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
92
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
93
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
94
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
95
+
96
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
97
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
98
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
99
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
100
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
101
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
102
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
103
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
104
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
105
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
106
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
107
+
108
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
109
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
110
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
111
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
112
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
114
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
115
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
116
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
117
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
119
+
120
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
121
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
122
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
123
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
124
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
125
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
126
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
127
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
128
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
129
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
130
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
131
+
132
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
133
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
134
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
135
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
136
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
137
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
138
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
139
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
140
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
141
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
142
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
143
+
144
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
145
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
146
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
147
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
148
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
150
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
151
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
152
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
153
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
155
+
156
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
157
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
158
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
159
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
160
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
161
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
162
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
163
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
164
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
165
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
166
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
167
+
168
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
169
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
170
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
171
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
172
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
173
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
174
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
175
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
176
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
177
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
178
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
179
+
180
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
181
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
182
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
183
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
184
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
186
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
187
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
188
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
189
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
191
+
192
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
193
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
194
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
195
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
196
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
197
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
198
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
199
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
200
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
201
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
202
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
203
+
204
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
205
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
206
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
207
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
208
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
209
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
210
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
211
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
212
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
213
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
214
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
215
+
216
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
217
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
218
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
219
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
220
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
222
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
223
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
224
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
225
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
227
+
228
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
229
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
230
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
231
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
232
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
233
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
234
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
235
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
236
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
237
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
238
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
239
+
240
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
241
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
242
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
243
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
244
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
245
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
246
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
247
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
248
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
249
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
250
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
251
+
252
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
253
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
254
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
255
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
256
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
258
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
259
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
260
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
261
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
263
+
264
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
265
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
266
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
267
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
268
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
269
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
270
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
271
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
272
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
273
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
274
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
275
+
276
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
277
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
278
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
279
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
280
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
281
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
282
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
283
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
284
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
285
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
286
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
287
+
288
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
289
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
290
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
291
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
292
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
293
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
294
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
295
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
296
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
297
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
298
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
299
+
300
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
301
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
302
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
303
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
304
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
305
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
306
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
307
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
308
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
309
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
310
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
311
+
312
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
313
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
314
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
315
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
316
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
317
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
318
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
319
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
320
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
321
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
322
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
323
+
324
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
325
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
326
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
327
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
328
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
329
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
330
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
331
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
332
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
333
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
334
+ llama_model_quantize_internal: model size = 4298.38 MB
335
+ llama_model_quantize_internal: quant size = 1570.05 MB
336
+ llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
337
+
338
+ main: quantize time = 33024.88 ms
339
+ main: total time = 33024.88 ms
IQ3_M_log.txt ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_IQ3_M.gguf' as IQ3_M
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to iq3_s .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 214.84 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
47
+
48
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
49
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
50
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
51
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
52
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
53
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
54
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
55
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
56
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
57
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
58
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
59
+
60
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
61
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
62
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
63
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
64
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
65
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
66
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
67
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
68
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
69
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
70
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
71
+
72
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
73
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
74
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
75
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
76
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
78
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
79
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
80
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
81
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
83
+
84
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
85
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
86
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
87
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
88
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
89
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
90
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
91
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
92
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
93
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
94
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
95
+
96
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
97
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
98
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
99
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
100
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
101
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
102
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
103
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
104
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
105
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
106
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
107
+
108
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
109
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
110
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
111
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
112
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
114
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
115
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
116
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
117
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
119
+
120
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
121
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
122
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
123
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
124
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
125
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
126
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
127
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
128
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
129
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
130
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
131
+
132
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
133
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
134
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
135
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
136
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
137
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
138
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
139
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
140
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
141
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
142
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
143
+
144
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
145
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
146
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
147
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
148
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
150
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
151
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
152
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
153
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
155
+
156
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
157
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
158
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
159
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
160
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
161
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
162
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
163
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
164
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
165
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
166
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
167
+
168
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
169
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
170
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
171
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
172
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
173
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
174
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
175
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
176
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
177
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
178
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
179
+
180
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
181
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
182
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
183
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
184
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
186
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
187
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
188
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
189
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
191
+
192
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
193
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
194
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
195
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
196
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
197
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
198
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
199
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
200
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
201
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
202
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
203
+
204
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
205
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
206
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
207
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
208
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
209
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
210
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
211
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
212
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
213
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
214
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
215
+
216
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
217
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
218
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
219
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
220
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
222
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
223
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
224
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
225
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
227
+
228
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
229
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
230
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
231
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
232
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
233
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
234
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
235
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
236
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
237
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
238
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
239
+
240
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
241
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
242
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
243
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
244
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
245
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
246
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
247
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
248
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
249
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
250
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
251
+
252
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
253
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
254
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
255
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
256
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
258
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
259
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
260
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
261
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
263
+
264
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
265
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
266
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
267
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
268
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
269
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
270
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
271
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
272
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
273
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
274
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
275
+
276
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
277
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
278
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
279
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
280
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
281
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
282
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
283
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
284
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
285
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
286
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
287
+
288
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
289
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
290
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
291
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
292
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
293
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
294
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
295
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
296
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
297
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
298
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
299
+
300
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
301
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
302
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
303
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
304
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
305
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
306
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
307
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
308
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
309
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
310
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
311
+
312
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
313
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
314
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
315
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
316
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
317
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
318
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
319
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
320
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
321
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
322
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
323
+
324
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
325
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
326
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
327
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
328
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
329
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
330
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
331
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
332
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
333
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
334
+ llama_model_quantize_internal: model size = 4298.38 MB
335
+ llama_model_quantize_internal: quant size = 1772.29 MB
336
+ llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
337
+
338
+ main: quantize time = 20053.13 ms
339
+ main: total time = 20053.13 ms
IQ3_S_log.txt ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_IQ3_S.gguf' as IQ3_S
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to iq3_s .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 214.84 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
47
+
48
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
49
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
50
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
51
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
52
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
53
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
54
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
55
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
56
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
57
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
58
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
59
+
60
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
61
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
62
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
63
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
64
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
65
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
66
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
67
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
68
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
69
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
70
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
71
+
72
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
73
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
74
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
75
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
76
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
78
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
79
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
80
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
81
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
83
+
84
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
85
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
86
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
87
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
88
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
89
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
90
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
91
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
92
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
93
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
94
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
95
+
96
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
97
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
98
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
99
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
100
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
101
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
102
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
103
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
104
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
105
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
106
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
107
+
108
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
109
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
110
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
111
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
112
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
114
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
115
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
116
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
117
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
119
+
120
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
121
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
122
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
123
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
124
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
125
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
126
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
127
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
128
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
129
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
130
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
131
+
132
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
133
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
134
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
135
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
136
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
137
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
138
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
139
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
140
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
141
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
142
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
143
+
144
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
145
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
146
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
147
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
148
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
150
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
151
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
152
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
153
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
155
+
156
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
157
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
158
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
159
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
160
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
161
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
162
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
163
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
164
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
165
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
166
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
167
+
168
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
169
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
170
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
171
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
172
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
173
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
174
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
175
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
176
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
177
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
178
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
179
+
180
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
181
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
182
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
183
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
184
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
186
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
187
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
188
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
189
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
191
+
192
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
193
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
194
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
195
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
196
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
197
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
198
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
199
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
200
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
201
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
202
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
203
+
204
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
205
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
206
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
207
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
208
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
209
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
210
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
211
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
212
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
213
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
214
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
215
+
216
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
217
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
218
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
219
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
220
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
222
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
223
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
224
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
225
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
227
+
228
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
229
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
230
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
231
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
232
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
233
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
234
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
235
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
236
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
237
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
238
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
239
+
240
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
241
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
242
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
243
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
244
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
245
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
246
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
247
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
248
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
249
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
250
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
251
+
252
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
253
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
254
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
255
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
256
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
258
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
259
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
260
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
261
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
263
+
264
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
265
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
266
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
267
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
268
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
269
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
270
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
271
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
272
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
273
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
274
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
275
+
276
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
277
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
278
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
279
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
280
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
281
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
282
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
283
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
284
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
285
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
286
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
287
+
288
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
289
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
290
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
291
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
292
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
293
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
294
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
295
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
296
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
297
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
298
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
299
+
300
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
301
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
302
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
303
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
304
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
305
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
306
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
307
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
308
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
309
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
310
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
311
+
312
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
313
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
314
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
315
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
316
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
317
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
318
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
319
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
320
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
321
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
322
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
323
+
324
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
325
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
326
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
327
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
328
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
329
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
330
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
331
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
332
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
333
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
334
+ llama_model_quantize_internal: model size = 4298.38 MB
335
+ llama_model_quantize_internal: quant size = 1742.80 MB
336
+ llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
337
+
338
+ main: quantize time = 21645.04 ms
339
+ main: total time = 21645.04 ms
IQ3_XS_log.txt ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_IQ3_XS.gguf' as IQ3_XS
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to iq3_s .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 214.84 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
47
+
48
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
49
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
50
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
51
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
52
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
53
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
54
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
55
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
56
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
57
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
58
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
59
+
60
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
61
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
62
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
63
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
64
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
65
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
66
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
67
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
68
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
69
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
70
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
71
+
72
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
73
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
74
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
75
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
76
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
78
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
79
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
80
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
81
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
83
+
84
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
85
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
86
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
87
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
88
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
89
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
90
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
91
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
92
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
93
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
94
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
95
+
96
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
97
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
98
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
99
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
100
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
101
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
102
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
103
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
104
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
105
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
106
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
107
+
108
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
109
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
110
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
111
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
112
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
114
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
115
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
116
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
117
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
119
+
120
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
121
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
122
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
123
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
124
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
125
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
126
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
127
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
128
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
129
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
130
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
131
+
132
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
133
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
134
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
135
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
136
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
137
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
138
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
139
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
140
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
141
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
142
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
143
+
144
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
145
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
146
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
147
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
148
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
150
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
151
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
152
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
153
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
155
+
156
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
157
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
158
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
159
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
160
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
161
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
162
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
163
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
164
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
165
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
166
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
167
+
168
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
169
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
170
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
171
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
172
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
173
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
174
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
175
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
176
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
177
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
178
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
179
+
180
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
181
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
182
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
183
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
184
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
186
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
187
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
188
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
189
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
191
+
192
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
193
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
194
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
195
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
196
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
197
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
198
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
199
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
200
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
201
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
202
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
203
+
204
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
205
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
206
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
207
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
208
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
209
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
210
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
211
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
212
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
213
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
214
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
215
+
216
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
217
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
218
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
219
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
220
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
222
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
223
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
224
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
225
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
227
+
228
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
229
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
230
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
231
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
232
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
233
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
234
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
235
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
236
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
237
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
238
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
239
+
240
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
241
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
242
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
243
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
244
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
245
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
246
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
247
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
248
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
249
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
250
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
251
+
252
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
253
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
254
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
255
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
256
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
258
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
259
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
260
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
261
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
263
+
264
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
265
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
266
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
267
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
268
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
269
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
270
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
271
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
272
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
273
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
274
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
275
+
276
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
277
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
278
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
279
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
280
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
281
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
282
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
283
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
284
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
285
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
286
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
287
+
288
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
289
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
290
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
291
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
292
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
293
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
294
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
295
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
296
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
297
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
298
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
299
+
300
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
301
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
302
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
303
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
304
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
305
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
306
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
307
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
308
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
309
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
310
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
311
+
312
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
313
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
314
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
315
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
316
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
317
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
318
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
319
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
320
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
321
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
322
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
323
+
324
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
325
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
326
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
327
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
328
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
329
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
330
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
331
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
332
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
333
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
334
+ llama_model_quantize_internal: model size = 4298.38 MB
335
+ llama_model_quantize_internal: quant size = 1715.88 MB
336
+ llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
337
+
338
+ main: quantize time = 25070.00 ms
339
+ main: total time = 25070.00 ms
IQ3_XXS_log.txt ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_IQ3_XXS.gguf' as IQ3_XXS
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to iq3_s .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 214.84 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
47
+
48
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
49
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
50
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
51
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
52
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
53
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
54
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
55
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
56
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
57
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
58
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
59
+
60
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
61
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
62
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
63
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
64
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
65
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
66
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
67
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
68
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
69
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
70
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
71
+
72
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
73
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
74
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
75
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
76
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
78
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
79
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
80
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
81
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
83
+
84
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
85
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
86
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
87
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
88
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
89
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
90
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
91
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
92
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
93
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
94
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
95
+
96
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
97
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
98
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
99
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
100
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
101
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
102
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
103
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
104
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
105
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
106
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
107
+
108
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
109
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
110
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
111
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
112
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
114
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
115
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
116
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
117
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
119
+
120
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
121
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
122
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
123
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
124
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
125
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
126
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
127
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
128
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
129
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
130
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
131
+
132
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
133
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
134
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
135
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
136
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
137
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
138
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
139
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
140
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
141
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
142
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
143
+
144
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
145
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
146
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
147
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
148
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
150
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
151
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
152
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
153
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
155
+
156
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
157
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
158
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
159
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
160
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
161
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
162
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
163
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
164
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
165
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
166
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
167
+
168
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
169
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
170
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
171
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
172
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
173
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
174
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
175
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
176
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
177
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
178
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
179
+
180
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
181
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
182
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
183
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
184
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
186
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
187
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
188
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
189
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
191
+
192
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
193
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
194
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
195
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
196
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
197
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
198
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
199
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
200
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
201
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
202
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
203
+
204
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
205
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
206
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
207
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
208
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
209
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
210
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
211
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
212
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
213
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
214
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
215
+
216
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
217
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
218
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
219
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
220
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
222
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
223
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
224
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
225
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
227
+
228
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
229
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
230
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
231
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
232
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
233
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
234
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
235
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
236
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
237
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
238
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
239
+
240
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
241
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
242
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
243
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
244
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
245
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
246
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
247
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
248
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
249
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
250
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
251
+
252
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
253
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
254
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
255
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
256
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
258
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
259
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
260
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
261
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
263
+
264
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
265
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
266
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
267
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
268
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
269
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
270
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
271
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
272
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
273
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
274
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
275
+
276
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
277
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
278
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
279
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
280
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
281
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
282
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
283
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
284
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
285
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
286
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
287
+
288
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
289
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
290
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
291
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
292
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
293
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
294
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
295
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
296
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
297
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
298
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
299
+
300
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
301
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
302
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
303
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
304
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
305
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
306
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
307
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
308
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
309
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
310
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
311
+
312
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
313
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
314
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
315
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
316
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
317
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
318
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
319
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
320
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
321
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
322
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
323
+
324
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
325
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
326
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
327
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
328
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
329
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
330
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
331
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
332
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
333
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
334
+ llama_model_quantize_internal: model size = 4298.38 MB
335
+ llama_model_quantize_internal: quant size = 1693.40 MB
336
+ llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
337
+
338
+ main: quantize time = 28893.81 ms
339
+ main: total time = 28893.81 ms
IQ4_NL_log.txt ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_IQ4_NL.gguf' as IQ4_NL
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to iq4_nl .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 281.25 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
47
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
48
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
49
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
50
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
51
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
52
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
53
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
54
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
55
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
56
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
57
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
58
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
59
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
60
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
61
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
62
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
63
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
64
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
65
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
66
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
67
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
68
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
69
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
70
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
71
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
72
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
73
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
74
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
75
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
76
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
78
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
79
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
80
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
81
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
83
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
84
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
85
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
86
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
87
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
88
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
89
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
90
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
91
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
92
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
93
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
94
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
95
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
96
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
97
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
98
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
99
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
100
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
101
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
102
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
103
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
104
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
105
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
106
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
107
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
108
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
109
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
110
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
111
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
112
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
114
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
115
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
116
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
117
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
119
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
120
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
121
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
122
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
123
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
124
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
125
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
126
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
127
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
128
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
129
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
130
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
131
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
132
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
133
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
134
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
135
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
136
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
137
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
138
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
139
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
140
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
141
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
142
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
143
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
144
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
145
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
146
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
147
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
148
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
150
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
151
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
152
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
153
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
155
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
156
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
157
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
158
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
159
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
160
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
161
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
162
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
163
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
164
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
165
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
166
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
167
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
168
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
169
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
170
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
171
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
172
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
173
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
174
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
175
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
176
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
177
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
178
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
179
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
180
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
181
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
182
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
183
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
184
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
186
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
187
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
188
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
189
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
191
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
192
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
193
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
194
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
195
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
196
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
197
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
198
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
199
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
200
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
201
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
202
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
203
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
204
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
205
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
206
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
207
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
208
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
209
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
210
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
211
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
212
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
213
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
214
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
215
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
216
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
217
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
218
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
219
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
220
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
222
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
223
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
224
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
225
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
227
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
228
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
229
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
230
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
231
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
232
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
233
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
234
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
235
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
236
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
237
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
238
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
239
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
240
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
241
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
242
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
243
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
244
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
245
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
246
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
247
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
248
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
249
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
250
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
251
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
252
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
253
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
254
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
255
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
256
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
258
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
259
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
260
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
261
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ llama_model_quantize_internal: model size = 4298.38 MB
263
+ llama_model_quantize_internal: quant size = 1927.95 MB
264
+
265
+ main: quantize time = 18024.91 ms
266
+ main: total time = 18024.91 ms
IQ4_XS_log.txt ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_IQ4_XS.gguf' as IQ4_XS
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to iq4_xs .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 265.62 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
47
+
48
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
49
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
50
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
51
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
52
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
53
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
54
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
55
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
56
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
57
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
58
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
59
+
60
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
61
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
62
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
63
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
64
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
65
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
66
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
67
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
68
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
69
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
70
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
71
+
72
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
73
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
74
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
75
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
76
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
78
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
79
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
80
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
81
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
83
+
84
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
85
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
86
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
87
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
88
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
89
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
90
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
91
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
92
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
93
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
94
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
95
+
96
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
97
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
98
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
99
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
100
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
101
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
102
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
103
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
104
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
105
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
106
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
107
+
108
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
109
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
110
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
111
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
112
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
114
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
115
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
116
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
117
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
119
+
120
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
121
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
122
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
123
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
124
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
125
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
126
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
127
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
128
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
129
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
130
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
131
+
132
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
133
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
134
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
135
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
136
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
137
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
138
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
139
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
140
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
141
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
142
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
143
+
144
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
145
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
146
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
147
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
148
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
150
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
151
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
152
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
153
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
155
+
156
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
157
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
158
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
159
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
160
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
161
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
162
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
163
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
164
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
165
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
166
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
167
+
168
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
169
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
170
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
171
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
172
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
173
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
174
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
175
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
176
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
177
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
178
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
179
+
180
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
181
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
182
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
183
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
184
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
186
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
187
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
188
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
189
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
191
+
192
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
193
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
194
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
195
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
196
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
197
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
198
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
199
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
200
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
201
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
202
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
203
+
204
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
205
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
206
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
207
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
208
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
209
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
210
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
211
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
212
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
213
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
214
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
215
+
216
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
217
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
218
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
219
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
220
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
222
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
223
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
224
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
225
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
227
+
228
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
229
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
230
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
231
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
232
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
233
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
234
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
235
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
236
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
237
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
238
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
239
+
240
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
241
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
242
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
243
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
244
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
245
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
246
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
247
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
248
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
249
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
250
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
251
+
252
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
253
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
254
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
255
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
256
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
258
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
259
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
260
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
261
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
263
+
264
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
265
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
266
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
267
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
268
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
269
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
270
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
271
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
272
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
273
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
274
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
275
+
276
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
277
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
278
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
279
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
280
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
281
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
282
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
283
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
284
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
285
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
286
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
287
+
288
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
289
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
290
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
291
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
292
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
293
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
294
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
295
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
296
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
297
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
298
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
299
+
300
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
301
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
302
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
303
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
304
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
305
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
306
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
307
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
308
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
309
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
310
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
311
+
312
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
313
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
314
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
315
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
316
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
317
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
318
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
319
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
320
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
321
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
322
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
323
+
324
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
325
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
326
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
327
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
328
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
329
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
330
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
331
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
332
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
333
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
334
+ llama_model_quantize_internal: model size = 4298.38 MB
335
+ llama_model_quantize_internal: quant size = 1884.38 MB
336
+ llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
337
+
338
+ main: quantize time = 18604.79 ms
339
+ main: total time = 18604.79 ms
Q3_K_L_log.txt ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q3_K_L.gguf' as Q3_K_L
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to q3_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 214.84 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
47
+
48
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
49
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
50
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
51
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
52
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
53
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
54
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
55
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
56
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
57
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
58
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
59
+
60
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
61
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
62
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
63
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
64
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
65
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
66
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
67
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
68
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
69
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
70
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
71
+
72
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
73
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
74
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
75
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
76
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
78
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
79
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
80
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
81
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
83
+
84
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
85
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
86
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
87
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
88
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
89
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
90
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
91
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
92
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
93
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
94
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
95
+
96
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
97
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
98
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
99
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
100
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
101
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
102
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
103
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
104
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
105
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
106
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
107
+
108
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
109
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
110
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
111
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
112
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
114
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
115
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
116
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
117
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
119
+
120
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
121
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
122
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
123
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
124
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
125
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
126
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
127
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
128
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
129
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
130
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
131
+
132
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
133
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
134
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
135
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
136
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
137
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
138
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
139
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
140
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
141
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
142
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
143
+
144
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
145
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
146
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
147
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
148
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
150
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
151
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
152
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
153
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
155
+
156
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
157
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
158
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
159
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
160
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
161
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
162
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
163
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
164
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
165
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
166
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
167
+
168
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
169
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
170
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
171
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
172
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
173
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
174
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
175
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
176
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
177
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
178
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
179
+
180
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
181
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
182
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
183
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
184
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
186
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
187
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
188
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
189
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
191
+
192
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
193
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
194
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
195
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
196
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
197
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
198
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
199
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
200
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
201
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
202
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
203
+
204
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
205
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
206
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
207
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
208
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
209
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
210
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
211
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
212
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
213
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
214
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
215
+
216
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
217
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
218
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
219
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
220
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
222
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
223
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
224
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
225
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
227
+
228
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
229
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
230
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
231
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
232
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
233
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
234
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
235
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
236
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
237
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
238
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
239
+
240
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
241
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
242
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
243
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
244
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
245
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
246
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
247
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
248
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
249
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
250
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
251
+
252
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
253
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
254
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
255
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
256
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
258
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
259
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
260
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
261
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
263
+
264
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
265
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
266
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
267
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
268
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
269
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
270
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
271
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
272
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
273
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
274
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
275
+
276
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
277
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
278
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
279
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
280
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
281
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
282
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
283
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
284
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
285
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
286
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
287
+
288
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
289
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
290
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
291
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
292
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
293
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
294
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
295
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
296
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
297
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
298
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
299
+
300
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
301
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
302
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
303
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
304
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
305
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
306
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
307
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
308
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
309
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
310
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
311
+
312
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
313
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
314
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
315
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
316
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
317
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
318
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
319
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
320
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
321
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
322
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
323
+
324
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
325
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
326
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
327
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
328
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
329
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
330
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
331
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
332
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
333
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
334
+ llama_model_quantize_internal: model size = 4298.38 MB
335
+ llama_model_quantize_internal: quant size = 1840.12 MB
336
+ llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
337
+
338
+ main: quantize time = 6546.55 ms
339
+ main: total time = 6546.55 ms
Q3_K_M_log.txt ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q3_K_M.gguf' as Q3_K_M
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to q3_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 214.84 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
47
+
48
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
49
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
50
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
51
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
52
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
53
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
54
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
55
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
56
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
57
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
58
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
59
+
60
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
61
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
62
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
63
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
64
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
65
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
66
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
67
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
68
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
69
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
70
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
71
+
72
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
73
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
74
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
75
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
76
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
78
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
79
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
80
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
81
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
83
+
84
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
85
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
86
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
87
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
88
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
89
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
90
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
91
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
92
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
93
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
94
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
95
+
96
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
97
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
98
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
99
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
100
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
101
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
102
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
103
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
104
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
105
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
106
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
107
+
108
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
109
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
110
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
111
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
112
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
114
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
115
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
116
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
117
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
119
+
120
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
121
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
122
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
123
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
124
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
125
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
126
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
127
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
128
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
129
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
130
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
131
+
132
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
133
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
134
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
135
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
136
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
137
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
138
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
139
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
140
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
141
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
142
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
143
+
144
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
145
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
146
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
147
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
148
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
150
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
151
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
152
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
153
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
155
+
156
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
157
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
158
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
159
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
160
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
161
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
162
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
163
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
164
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
165
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
166
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
167
+
168
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
169
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
170
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
171
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
172
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
173
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
174
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
175
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
176
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
177
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
178
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
179
+
180
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
181
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
182
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
183
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
184
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
186
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
187
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
188
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
189
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
191
+
192
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
193
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
194
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
195
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
196
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
197
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
198
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
199
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
200
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
201
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
202
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
203
+
204
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
205
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
206
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
207
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
208
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
209
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
210
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
211
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
212
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
213
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
214
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
215
+
216
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
217
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
218
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
219
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
220
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
222
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
223
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
224
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
225
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
227
+
228
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
229
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
230
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
231
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
232
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
233
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
234
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
235
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
236
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
237
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
238
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
239
+
240
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
241
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
242
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
243
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
244
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
245
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
246
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
247
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
248
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
249
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
250
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
251
+
252
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
253
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
254
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
255
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
256
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
258
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
259
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
260
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
261
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
263
+
264
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
265
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
266
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
267
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
268
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
269
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
270
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
271
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
272
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
273
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
274
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
275
+
276
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
277
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
278
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
279
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
280
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
281
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
282
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
283
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
284
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
285
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
286
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
287
+
288
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
289
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
290
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
291
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
292
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
293
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
294
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
295
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
296
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
297
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
298
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
299
+
300
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
301
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
302
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
303
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
304
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
305
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
306
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
307
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
308
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
309
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
310
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
311
+
312
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
313
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
314
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
315
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
316
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
317
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
318
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
319
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
320
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
321
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
322
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
323
+
324
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
325
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
326
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
327
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
328
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
329
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
330
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
331
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
332
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
333
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
334
+ llama_model_quantize_internal: model size = 4298.38 MB
335
+ llama_model_quantize_internal: quant size = 1801.84 MB
336
+ llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
337
+
338
+ main: quantize time = 5383.58 ms
339
+ main: total time = 5383.58 ms
Q3_K_S_log.txt ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q3_K_S.gguf' as Q3_K_S
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to q3_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 214.84 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
47
+
48
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
49
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
50
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
51
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
52
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
53
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
54
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
55
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
56
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
57
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
58
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
59
+
60
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
61
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
62
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
63
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
64
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
65
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
66
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
67
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
68
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
69
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
70
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
71
+
72
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
73
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
74
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
75
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
76
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
78
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
79
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
80
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
81
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
83
+
84
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
85
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
86
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
87
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
88
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
89
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
90
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
91
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
92
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
93
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
94
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
95
+
96
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
97
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
98
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
99
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
100
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
101
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
102
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
103
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
104
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
105
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
106
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
107
+
108
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
109
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
110
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
111
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
112
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
114
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
115
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
116
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
117
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
119
+
120
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
121
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
122
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
123
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
124
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
125
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
126
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
127
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
128
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
129
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
130
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
131
+
132
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
133
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
134
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
135
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
136
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
137
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
138
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
139
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
140
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
141
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
142
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
143
+
144
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
145
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
146
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
147
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
148
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
150
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
151
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
152
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
153
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
155
+
156
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
157
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
158
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
159
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
160
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
161
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
162
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
163
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
164
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
165
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
166
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
167
+
168
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
169
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
170
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
171
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
172
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
173
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
174
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
175
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
176
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
177
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
178
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
179
+
180
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
181
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
182
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
183
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
184
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
186
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
187
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
188
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
189
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
191
+
192
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
193
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
194
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
195
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
196
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
197
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
198
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
199
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
200
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
201
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
202
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
203
+
204
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
205
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
206
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
207
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
208
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
209
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
210
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
211
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
212
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
213
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
214
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
215
+
216
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
217
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
218
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
219
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
220
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
222
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
223
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
224
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
225
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
227
+
228
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
229
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
230
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
231
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
232
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
233
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
234
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
235
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
236
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
237
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
238
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
239
+
240
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
241
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
242
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
243
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
244
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
245
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
246
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
247
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
248
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
249
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
250
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
251
+
252
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
253
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
254
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
255
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
256
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
258
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
259
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
260
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
261
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
263
+
264
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
265
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
266
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
267
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
268
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
269
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
270
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
271
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
272
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
273
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
274
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
275
+
276
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
277
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
278
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
279
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
280
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
281
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
282
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
283
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
284
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
285
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
286
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
287
+
288
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
289
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
290
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
291
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
292
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
293
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
294
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
295
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
296
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
297
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
298
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
299
+
300
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
301
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
302
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
303
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
304
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
305
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
306
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
307
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
308
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
309
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
310
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
311
+
312
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
313
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
314
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
315
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
316
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
317
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
318
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
319
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
320
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
321
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
322
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
323
+
324
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
325
+ converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
326
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
327
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
328
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
329
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
330
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
331
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
332
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
333
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
334
+ llama_model_quantize_internal: model size = 4298.38 MB
335
+ llama_model_quantize_internal: quant size = 1742.80 MB
336
+ llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
337
+
338
+ main: quantize time = 6724.72 ms
339
+ main: total time = 6724.72 ms
Q4_K_M_log.txt ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q4_K_M.gguf' as Q4_K_M
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to q4_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 281.25 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
47
+
48
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
49
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
50
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
51
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
52
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
53
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
54
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
55
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
56
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
57
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
58
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
59
+
60
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
61
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
62
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
63
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
64
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
65
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
66
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
67
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
68
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
69
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
70
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
71
+
72
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
73
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
74
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
75
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
76
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
78
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
79
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
80
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
81
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
83
+
84
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
85
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
86
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
87
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
88
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
89
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
90
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
91
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
92
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
93
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
94
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
95
+
96
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
97
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
98
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
99
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
100
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
101
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
102
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
103
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
104
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
105
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
106
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
107
+
108
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
109
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
110
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
111
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
112
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
114
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
115
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
116
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
117
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
119
+
120
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
121
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
122
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
123
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
124
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
125
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
126
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
127
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
128
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
129
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
130
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
131
+
132
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
133
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
134
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
135
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
136
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
137
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
138
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
139
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
140
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
141
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
142
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
143
+
144
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
145
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
146
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
147
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
148
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
150
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
151
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
152
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
153
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
155
+
156
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
157
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
158
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
159
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
160
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
161
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
162
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
163
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
164
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
165
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
166
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
167
+
168
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
169
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
170
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
171
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
172
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
173
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
174
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
175
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
176
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
177
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
178
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
179
+
180
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
181
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
182
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
183
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
184
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
186
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
187
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
188
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
189
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
191
+
192
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
193
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
194
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
195
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
196
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
197
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
198
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
199
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
200
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
201
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
202
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
203
+
204
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
205
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
206
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
207
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
208
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
209
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
210
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
211
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
212
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
213
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
214
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
215
+
216
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
217
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
218
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
219
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
220
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
222
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
223
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
224
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
225
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
227
+
228
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
229
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
230
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
231
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
232
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
233
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
234
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
235
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
236
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
237
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
238
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
239
+
240
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
241
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
242
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
243
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
244
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
245
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
246
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
247
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
248
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
249
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
250
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
251
+
252
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
253
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
254
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
255
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
256
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
258
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
259
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
260
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
261
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
263
+
264
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
265
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
266
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
267
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
268
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
269
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
270
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
271
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
272
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
273
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
274
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
275
+
276
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
277
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
278
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
279
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
280
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
281
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
282
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
283
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
284
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
285
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
286
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
287
+
288
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
289
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
290
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
291
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
292
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
293
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
294
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
295
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
296
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
297
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
298
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
299
+
300
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
301
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
302
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
303
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
304
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
305
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
306
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
307
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
308
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
309
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
310
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
311
+
312
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
313
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
314
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
315
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
316
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
317
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
318
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
319
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
320
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
321
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
322
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
323
+
324
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
325
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
326
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
327
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
328
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
329
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
330
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
331
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
332
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
333
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
334
+ llama_model_quantize_internal: model size = 4298.38 MB
335
+ llama_model_quantize_internal: quant size = 2020.01 MB
336
+ llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
337
+
338
+ main: quantize time = 8902.98 ms
339
+ main: total time = 8902.98 ms
Q4_K_S_log.txt ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q4_K_S.gguf' as Q4_K_S
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to q4_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 281.25 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
47
+
48
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
49
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
50
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
51
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
52
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
53
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
54
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
55
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
56
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
57
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
58
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
59
+
60
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
61
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
62
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
63
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
64
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
65
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
66
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
67
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
68
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
69
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
70
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
71
+
72
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
73
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
74
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
75
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
76
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
78
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
79
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
80
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
81
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
83
+
84
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
85
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
86
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
87
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
88
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
89
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
90
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
91
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
92
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
93
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
94
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
95
+
96
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
97
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
98
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
99
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
100
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
101
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
102
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
103
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
104
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
105
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
106
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
107
+
108
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
109
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
110
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
111
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
112
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
114
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
115
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
116
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
117
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
119
+
120
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
121
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
122
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
123
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
124
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
125
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
126
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
127
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
128
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
129
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
130
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
131
+
132
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
133
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
134
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
135
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
136
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
137
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
138
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
139
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
140
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
141
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
142
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
143
+
144
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
145
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
146
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
147
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
148
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
150
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
151
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
152
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
153
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
155
+
156
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
157
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
158
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
159
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
160
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
161
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
162
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
163
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
164
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
165
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
166
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
167
+
168
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
169
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
170
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
171
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
172
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
173
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
174
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
175
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
176
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
177
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
178
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
179
+
180
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
181
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
182
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
183
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
184
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
186
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
187
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
188
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
189
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
191
+
192
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
193
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
194
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
195
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
196
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
197
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
198
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
199
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
200
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
201
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
202
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
203
+
204
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
205
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
206
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
207
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
208
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
209
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
210
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
211
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
212
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
213
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
214
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
215
+
216
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
217
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
218
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
219
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
220
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
222
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
223
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
224
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
225
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
227
+
228
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
229
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
230
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
231
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
232
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
233
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
234
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
235
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
236
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
237
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
238
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
239
+
240
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
241
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
242
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
243
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
244
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
245
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
246
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
247
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
248
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
249
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
250
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
251
+
252
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
253
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
254
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
255
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
256
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
258
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
259
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
260
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
261
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
263
+
264
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
265
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
266
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
267
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
268
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
269
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
270
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
271
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
272
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
273
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
274
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
275
+
276
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
277
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
278
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
279
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
280
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
281
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
282
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
283
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
284
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
285
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
286
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
287
+
288
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
289
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
290
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
291
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
292
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
293
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
294
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
295
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
296
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
297
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
298
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
299
+
300
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
301
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
302
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
303
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
304
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
305
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
306
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
307
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
308
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
309
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
310
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
311
+
312
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
313
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
314
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
315
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
316
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
317
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
318
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
319
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
320
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
321
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
322
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
323
+
324
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
325
+ converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
326
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
327
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
328
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
329
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
330
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
331
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
332
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
333
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
334
+ llama_model_quantize_internal: model size = 4298.38 MB
335
+ llama_model_quantize_internal: quant size = 1963.81 MB
336
+ llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
337
+
338
+ main: quantize time = 9350.38 ms
339
+ main: total time = 9350.38 ms
Q5_K_M_log.txt ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q5_K_M.gguf' as Q5_K_M
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to q5_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 343.75 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
47
+
48
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
49
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
50
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
51
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
52
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
53
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
54
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
55
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
56
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
57
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
58
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
59
+
60
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
61
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
62
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
63
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
64
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
65
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
66
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
67
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
68
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
69
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
70
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
71
+
72
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
73
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
74
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
75
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
76
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
78
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
79
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
80
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
81
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
83
+
84
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
85
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
86
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
87
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
88
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
89
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
90
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
91
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
92
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
93
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
94
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
95
+
96
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
97
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
98
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
99
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
100
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
101
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
102
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
103
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
104
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
105
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
106
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
107
+
108
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
109
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
110
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
111
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
112
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
114
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
115
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
116
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
117
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
119
+
120
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
121
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
122
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
123
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
124
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
125
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
126
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
127
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
128
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
129
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
130
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
131
+
132
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
133
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
134
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
135
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
136
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
137
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
138
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
139
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
140
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
141
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
142
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
143
+
144
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
145
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
146
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
147
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
148
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
150
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
151
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
152
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
153
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
155
+
156
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
157
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
158
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
159
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
160
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
161
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
162
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
163
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
164
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
165
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
166
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
167
+
168
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
169
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
170
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
171
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
172
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
173
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
174
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
175
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
176
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
177
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
178
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
179
+
180
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
181
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
182
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
183
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
184
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
186
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
187
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
188
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
189
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
191
+
192
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
193
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
194
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
195
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
196
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
197
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
198
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
199
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
200
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
201
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
202
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
203
+
204
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
205
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
206
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
207
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
208
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
209
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
210
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
211
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
212
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
213
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
214
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
215
+
216
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
217
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
218
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
219
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
220
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
222
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
223
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
224
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
225
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
227
+
228
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
229
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
230
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
231
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
232
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
233
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
234
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
235
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
236
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
237
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
238
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
239
+
240
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
241
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
242
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
243
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
244
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
245
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
246
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
247
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
248
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
249
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
250
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
251
+
252
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
253
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
254
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
255
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
256
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
258
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
259
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
260
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
261
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
263
+
264
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
265
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
266
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
267
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
268
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
269
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
270
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
271
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
272
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
273
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
274
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
275
+
276
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
277
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
278
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
279
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
280
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
281
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
282
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
283
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
284
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
285
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
286
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
287
+
288
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
289
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
290
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
291
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
292
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
293
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
294
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
295
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
296
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
297
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
298
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
299
+
300
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
301
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
302
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
303
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
304
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
305
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
306
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
307
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
308
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
309
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
310
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
311
+
312
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
313
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
314
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
315
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
316
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
317
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
318
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
319
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
320
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
321
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
322
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
323
+
324
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
325
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
326
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
327
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
328
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
329
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
330
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
331
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
332
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
333
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
334
+ llama_model_quantize_internal: model size = 4298.38 MB
335
+ llama_model_quantize_internal: quant size = 2196.23 MB
336
+ llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
337
+
338
+ main: quantize time = 9522.94 ms
339
+ main: total time = 9522.94 ms
Q5_K_S_log.txt ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q5_K_S.gguf' as Q5_K_S
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to q5_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 343.75 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
47
+
48
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
49
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
50
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
51
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
52
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
53
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
54
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
55
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
56
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
57
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
58
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
59
+
60
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
61
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
62
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
63
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
64
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
65
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
66
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
67
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
68
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
69
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
70
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
71
+
72
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
73
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
74
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
75
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
76
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
78
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
79
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
80
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
81
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
83
+
84
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
85
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
86
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
87
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
88
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
89
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
90
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
91
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
92
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
93
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
94
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
95
+
96
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
97
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
98
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
99
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
100
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
101
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
102
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
103
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
104
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
105
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
106
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
107
+
108
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
109
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
110
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
111
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
112
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
114
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
115
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
116
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
117
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
119
+
120
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
121
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
122
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
123
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
124
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
125
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
126
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
127
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
128
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
129
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
130
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
131
+
132
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
133
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
134
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
135
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
136
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
137
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
138
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
139
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
140
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
141
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
142
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
143
+
144
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
145
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
146
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
147
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
148
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
150
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
151
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
152
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
153
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
155
+
156
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
157
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
158
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
159
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
160
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
161
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
162
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
163
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
164
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
165
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
166
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
167
+
168
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
169
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
170
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
171
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
172
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
173
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
174
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
175
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
176
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
177
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
178
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
179
+
180
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
181
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
182
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
183
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
184
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
186
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
187
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
188
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
189
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
191
+
192
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
193
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
194
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
195
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
196
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
197
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
198
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
199
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
200
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
201
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
202
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
203
+
204
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
205
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
206
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
207
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
208
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
209
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
210
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
211
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
212
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
213
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
214
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
215
+
216
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
217
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
218
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
219
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
220
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
222
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
223
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
224
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
225
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
227
+
228
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
229
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
230
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
231
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
232
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
233
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
234
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
235
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
236
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
237
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
238
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
239
+
240
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
241
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
242
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
243
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
244
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
245
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
246
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
247
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
248
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
249
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
250
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
251
+
252
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
253
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
254
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
255
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
256
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
258
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
259
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
260
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
261
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
263
+
264
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
265
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
266
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
267
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
268
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
269
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
270
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
271
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
272
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
273
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
274
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
275
+
276
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
277
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
278
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
279
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
280
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
281
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
282
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
283
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
284
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
285
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
286
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
287
+
288
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
289
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
290
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
291
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
292
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
293
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
294
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
295
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
296
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
297
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
298
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
299
+
300
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
301
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
302
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
303
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
304
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
305
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
306
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
307
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
308
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
309
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
310
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
311
+
312
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
313
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
314
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
315
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
316
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
317
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
318
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
319
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
320
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
321
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
322
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
323
+
324
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
325
+ converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
326
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
327
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
328
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
329
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
330
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
331
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
332
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
333
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
334
+ llama_model_quantize_internal: model size = 4298.38 MB
335
+ llama_model_quantize_internal: quant size = 2150.01 MB
336
+ llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
337
+
338
+ main: quantize time = 10361.94 ms
339
+ main: total time = 10361.94 ms
Q6_K_log.txt ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q6_K.gguf' as Q6_K
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to q6_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 410.16 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
47
+
48
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
49
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
50
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
51
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
52
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
53
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
54
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
55
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
56
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
57
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
58
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
59
+
60
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
61
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
62
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
63
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
64
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
65
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
66
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
67
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
68
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
69
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
70
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
71
+
72
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
73
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
74
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
75
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
76
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
78
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
79
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
80
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
81
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
83
+
84
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
85
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
86
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
87
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
88
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
89
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
90
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
91
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
92
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
93
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
94
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
95
+
96
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
97
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
98
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
99
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
100
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
101
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
102
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
103
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
104
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
105
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
106
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
107
+
108
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
109
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
110
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
111
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
112
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
114
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
115
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
116
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
117
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
119
+
120
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
121
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
122
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
123
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
124
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
125
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
126
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
127
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
128
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
129
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
130
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
131
+
132
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
133
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
134
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
135
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
136
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
137
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
138
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
139
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
140
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
141
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
142
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
143
+
144
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
145
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
146
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
147
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
148
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
150
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
151
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
152
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
153
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
155
+
156
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
157
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
158
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
159
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
160
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
161
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
162
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
163
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
164
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
165
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
166
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
167
+
168
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
169
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
170
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
171
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
172
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
173
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
174
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
175
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
176
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
177
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
178
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
179
+
180
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
181
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
182
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
183
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
184
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
186
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
187
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
188
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
189
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
191
+
192
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
193
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
194
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
195
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
196
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
197
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
198
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
199
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
200
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
201
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
202
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
203
+
204
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
205
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
206
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
207
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
208
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
209
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
210
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
211
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
212
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
213
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
214
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
215
+
216
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
217
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
218
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
219
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
220
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
222
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
223
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
224
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
225
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
227
+
228
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
229
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
230
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
231
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
232
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
233
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
234
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
235
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
236
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
237
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
238
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
239
+
240
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
241
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
242
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
243
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
244
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
245
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
246
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
247
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
248
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
249
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
250
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
251
+
252
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
253
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
254
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
255
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
256
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
258
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
259
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
260
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
261
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
263
+
264
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
265
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
266
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
267
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
268
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
269
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
270
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
271
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
272
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
273
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
274
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
275
+
276
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
277
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
278
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
279
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
280
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
281
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
282
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
283
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
284
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
285
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
286
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
287
+
288
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
289
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
290
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
291
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
292
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
293
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
294
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
295
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
296
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
297
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
298
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
299
+
300
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
301
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
302
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
303
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
304
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
305
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
306
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
307
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
308
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
309
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
310
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
311
+
312
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
313
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
314
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
315
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
316
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
317
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
318
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
319
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
320
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
321
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
322
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
323
+
324
+ llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
325
+ converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
326
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
327
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
328
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
329
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
330
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
331
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
332
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
333
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
334
+ llama_model_quantize_internal: model size = 4298.38 MB
335
+ llama_model_quantize_internal: quant size = 2414.84 MB
336
+ llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
337
+
338
+ main: quantize time = 4934.86 ms
339
+ main: total time = 4934.86 ms
Q8_0_log.txt ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 3906 (7eee341b)
2
+ main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
3
+ main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q8_0.gguf' as Q8_0
4
+ llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = llama
7
+ llama_model_loader: - kv 1: general.type str = model
8
+ llama_model_loader: - kv 2: general.size_label str = 2.3B
9
+ llama_model_loader: - kv 3: general.license str = apache-2.0
10
+ llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
11
+ llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
12
+ llama_model_loader: - kv 6: llama.block_count u32 = 24
13
+ llama_model_loader: - kv 7: llama.context_length u32 = 8192
14
+ llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
15
+ llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
16
+ llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
17
+ llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
18
+ llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
19
+ llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
20
+ llama_model_loader: - kv 14: general.file_type u32 = 32
21
+ llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
22
+ llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
23
+ llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
24
+ llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
25
+ llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
26
+ llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
27
+ llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
28
+ llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
29
+ llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
30
+ llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
31
+ llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
32
+ llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
33
+ llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
34
+ llama_model_loader: - kv 28: general.quantization_version u32 = 2
35
+ llama_model_loader: - type f32: 49 tensors
36
+ llama_model_loader: - type bf16: 170 tensors
37
+ ================================ Have weights data with 168 entries
38
+ [ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
39
+ [ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
40
+ ====== llama_model_quantize_internal: did not find weights for token_embd.weight
41
+ converting to q8_0 .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
42
+ load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
43
+ prepare_imatrix: have 168 importance matrix entries
44
+ size = 1000.00 MiB -> 531.25 MiB
45
+ [ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
46
+ [ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
47
+ [ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
48
+ [ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
49
+ [ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
50
+ [ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
51
+ [ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
52
+ [ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
53
+ [ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
54
+ [ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
55
+ [ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
56
+ [ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
57
+ [ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
58
+ [ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
59
+ [ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
60
+ [ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
61
+ [ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
62
+ [ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
63
+ [ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
64
+ [ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
65
+ [ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
66
+ [ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
67
+ [ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
68
+ [ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
69
+ [ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
70
+ [ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
71
+ [ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
72
+ [ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
73
+ [ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
74
+ [ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
75
+ [ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
76
+ [ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
77
+ [ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
78
+ [ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
79
+ [ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
80
+ [ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
81
+ [ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
82
+ [ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
83
+ [ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
84
+ [ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
85
+ [ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
86
+ [ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
87
+ [ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
88
+ [ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
89
+ [ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
90
+ [ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
91
+ [ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
92
+ [ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
93
+ [ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
94
+ [ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
95
+ [ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
96
+ [ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
97
+ [ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
98
+ [ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
99
+ [ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
100
+ [ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
101
+ [ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
102
+ [ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
103
+ [ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
104
+ [ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
105
+ [ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
106
+ [ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
107
+ [ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
108
+ [ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
109
+ [ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
110
+ [ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
111
+ [ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
112
+ [ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
113
+ [ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
114
+ [ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
115
+ [ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
116
+ [ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
117
+ [ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
118
+ [ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
119
+ [ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
120
+ [ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
121
+ [ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
122
+ [ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
123
+ [ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
124
+ [ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
125
+ [ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
126
+ [ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
127
+ [ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
128
+ [ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
129
+ [ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
130
+ [ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
131
+ [ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
132
+ [ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
133
+ [ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
134
+ [ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
135
+ [ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
136
+ [ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
137
+ [ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
138
+ [ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
139
+ [ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
140
+ [ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
141
+ [ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
142
+ [ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
143
+ [ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
144
+ [ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
145
+ [ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
146
+ [ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
147
+ [ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
148
+ [ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
149
+ [ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
150
+ [ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
151
+ [ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
152
+ [ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
153
+ [ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
154
+ [ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
155
+ [ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
156
+ [ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
157
+ [ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
158
+ [ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
159
+ [ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
160
+ [ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
161
+ [ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
162
+ [ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
163
+ [ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
164
+ [ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
165
+ [ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
166
+ [ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
167
+ [ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
168
+ [ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
169
+ [ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
170
+ [ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
171
+ [ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
172
+ [ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
173
+ [ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
174
+ [ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
175
+ [ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
176
+ [ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
177
+ [ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
178
+ [ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
179
+ [ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
180
+ [ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
181
+ [ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
182
+ [ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
183
+ [ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
184
+ [ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
185
+ [ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
186
+ [ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
187
+ [ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
188
+ [ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
189
+ [ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
190
+ [ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
191
+ [ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
192
+ [ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
193
+ [ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
194
+ [ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
195
+ [ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
196
+ [ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
197
+ [ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
198
+ [ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
199
+ [ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
200
+ [ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
201
+ [ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
202
+ [ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
203
+ [ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
204
+ [ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
205
+ [ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
206
+ [ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
207
+ [ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
208
+ [ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
209
+ [ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
210
+ [ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
211
+ [ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
212
+ [ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
213
+ [ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
214
+ [ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
215
+ [ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
216
+ [ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
217
+ [ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
218
+ [ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
219
+ [ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
220
+ [ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
221
+ [ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
222
+ [ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
223
+ [ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
224
+ [ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
225
+ [ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
226
+ [ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
227
+ [ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
228
+ [ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
229
+ [ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
230
+ [ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
231
+ [ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
232
+ [ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
233
+ [ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
234
+ [ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
235
+ [ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
236
+ [ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
237
+ [ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
238
+ [ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
239
+ [ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
240
+ [ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
241
+ [ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
242
+ [ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
243
+ [ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
244
+ [ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
245
+ [ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
246
+ [ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
247
+ [ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
248
+ [ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
249
+ [ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
250
+ [ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
251
+ [ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
252
+ [ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
253
+ [ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
254
+ [ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
255
+ [ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
256
+ [ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
257
+ [ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
258
+ [ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
259
+ [ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
260
+ [ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
261
+ [ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
262
+ llama_model_quantize_internal: model size = 4298.38 MB
263
+ llama_model_quantize_internal: quant size = 2752.45 MB
264
+
265
+ main: quantize time = 3216.17 ms
266
+ main: total time = 3216.17 ms
README.md CHANGED
@@ -27,7 +27,7 @@ language:
27
  - mt
28
  - nl
29
  - nn
30
- - no
31
  - oc
32
  - pl
33
  - pt
@@ -41,6 +41,59 @@ language:
41
  - uk
42
  ---
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  ![](./images/salamandra_header.png)
45
 
46
  # Salamandra Model Card
 
27
  - mt
28
  - nl
29
  - nn
30
+ - \no
31
  - oc
32
  - pl
33
  - pt
 
41
  - uk
42
  ---
43
 
44
+
45
+ # **Quantization Summary**
46
+
47
+ - **IQ4_NL**: Best I quantization below **Q4** with minimal PPL impact.
48
+ - **Q5_K_M**: Excellent balance above **Q4**, recommended for most applications.
49
+ - **Q6_K**: Provides near-**bf16** performance with size savings.
50
+
51
+ ---
52
+
53
+ # Quantization
54
+
55
+ ### **Perplexity Comparison Table:**
56
+
57
+ | **Quantization Type** | **PPL** | **ln(PPL(Q)/PPL(bf16))** | **File Size** | **Notes** |
58
+ |-----------------------|------------|--------------------------|---------------|----------------------------------------------------------------|
59
+ | **IQ3_M** | 15.1995 | 0.079131 | 1.7G | Good size efficiency with acceptable PPL increase |
60
+ | **Q3_K_L** | 15.0444 | 0.068875 | 1.8G | Further size reduction with modest PPL increase |
61
+ | **IQ4_NL** | 14.5534 | 0.035693 | 1.9G | Good size reduction with minimal PPL impact (**recommended**) |
62
+ | **Q4_K_M** | 14.399 | 0.025028 | 2.0G | Smaller with acceptable PPL |
63
+ | **Q5_K_M** | 14.1299 | 0.006162 | 2.2G | Excellent balance of PPL and size (**recommended**) |
64
+ | **Q6_K** | 14.0675 | 0.001736 | 2.4G | Nearly lossless performance with reduced size |
65
+ | **bf16** | 14.0431 | 0.0 | 4.2G | Baseline |
66
+
67
+ ---
68
+
69
+ ### **Notes:**
70
+
71
+ - **Recommended Quantizations:**
72
+ - **IQ4_NL**: Represents the best of the I quantization types below **Q4**, achieving good size efficiency while maintaining low perplexity.
73
+ - **Q5_K_M**: Offers the best balance between low perplexity and reduced file size above **Q4**, making it ideal for most applications.
74
+ - **Q6_K**: Delivers nearly lossless performance compared to **bf16** with a reduced file size (2.4G vs. 4.2G). Ideal for scenarios requiring maximum accuracy with some size savings.
75
+ - **Non-recommended Quanizations:**
76
+ - **IQ3_M**: Offers a smaller file size (1.7G) with an acceptable PPL increase (15.1995), making it a solid choice for highly compressed models.
77
+ - **Q3_K_L**: Provides a slightly larger file size (1.8G) with an even better PPL (15.0444). Fits within the selection criteria for highly compressed models with log PPL diff <0.3.
78
+ - **Q4_K_M**: While **Q4_K_M** is not designated as "recommended" in this context, it is highly suitable for architectures like **Metal**, which run **I-quant** models slowly. For such architectures, **Q4_K_M** remains an excellent choice.
79
+ - **Q6_K** Similar to Q8_0, offers very close perplexity to bf16. Given its smaller file size than Q8_0 (2.4G vs. 2.7G), Q6_K provides a better size-to-performance trade-off. It was selected because it is nearly lossless and less than 2.5GB.
80
+
81
+ ---
82
+
83
+ ### **Defending the Selection:**
84
+
85
+ The selection of recommended models is designed to provide a spectrum of options that meet the following criteria:
86
+
87
+ - **Diversity in Quantization Types:**
88
+ - **I Quantization Below Q4:** **IQ4_NL** is included to offer an option that uses I quantization below the **Q4** level, balancing size and performance.
89
+ - **K Quantization At and Above Q4:** **Q4_K_M**, **Q5_K_M**, and **Q6_K** provide K quantization options at and above the **Q4** level, giving users choices based on their specific needs.
90
+ - **Highly Compressed Quantization (Q3 and below):** **IQ3_M** and **Q3_K_L** are included as they meet the selection criteria of log PPL diff <0.3 and are not redundant with other models.
91
+
92
+ - **Selection Criteria:**
93
+ - **Log PPL diff <0.3:** All included models have a log PPL difference under 0.3, ensuring that they maintain acceptable performance even when highly quantized.
94
+ - **No Multiple Models Within 100MB of the Same File Size:** Only one model is included per similar file size range to avoid redundancy. For example, **Q3_K_L** (1.8G) is included while other models like **IQ3_XS** (1.7G) are excluded due to overlapping file sizes and comparable PPL, ensuring a sparse yet comprehensive selection.
95
+
96
+
97
  ![](./images/salamandra_header.png)
98
 
99
  # Salamandra Model Card
bf16_log.txt ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /Users/Shared/Public/Github/llama.cpp/convert_hf_to_gguf.py --outtype bf16 . --outfile ./salamandra-2b_bf16.gguf
2
+ INFO:hf-to-gguf:Loading model:
3
+ INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
4
+ INFO:hf-to-gguf:Exporting model...
5
+ INFO:hf-to-gguf:gguf: loading model part 'model.safetensors'
6
+ INFO:hf-to-gguf:output.weight, torch.bfloat16 --> BF16, shape = {2048, 256000}
7
+ INFO:hf-to-gguf:token_embd.weight, torch.bfloat16 --> BF16, shape = {2048, 256000}
8
+ INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
9
+ INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
10
+ INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
11
+ INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
12
+ INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
13
+ INFO:hf-to-gguf:blk.0.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
14
+ INFO:hf-to-gguf:blk.0.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
15
+ INFO:hf-to-gguf:blk.0.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
16
+ INFO:hf-to-gguf:blk.0.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
17
+ INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
18
+ INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
19
+ INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
20
+ INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
21
+ INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
22
+ INFO:hf-to-gguf:blk.1.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
23
+ INFO:hf-to-gguf:blk.1.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
24
+ INFO:hf-to-gguf:blk.1.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
25
+ INFO:hf-to-gguf:blk.1.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
26
+ INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
27
+ INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
28
+ INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
29
+ INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
30
+ INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
31
+ INFO:hf-to-gguf:blk.10.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
32
+ INFO:hf-to-gguf:blk.10.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
33
+ INFO:hf-to-gguf:blk.10.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
34
+ INFO:hf-to-gguf:blk.10.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
35
+ INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
36
+ INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
37
+ INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
38
+ INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
39
+ INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
40
+ INFO:hf-to-gguf:blk.11.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
41
+ INFO:hf-to-gguf:blk.11.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
42
+ INFO:hf-to-gguf:blk.11.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
43
+ INFO:hf-to-gguf:blk.11.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
44
+ INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
45
+ INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
46
+ INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
47
+ INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
48
+ INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
49
+ INFO:hf-to-gguf:blk.12.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
50
+ INFO:hf-to-gguf:blk.12.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
51
+ INFO:hf-to-gguf:blk.12.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
52
+ INFO:hf-to-gguf:blk.12.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
53
+ INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
54
+ INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
55
+ INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
56
+ INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
57
+ INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
58
+ INFO:hf-to-gguf:blk.13.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
59
+ INFO:hf-to-gguf:blk.13.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
60
+ INFO:hf-to-gguf:blk.13.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
61
+ INFO:hf-to-gguf:blk.13.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
62
+ INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
63
+ INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
64
+ INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
65
+ INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
66
+ INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
67
+ INFO:hf-to-gguf:blk.14.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
68
+ INFO:hf-to-gguf:blk.14.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
69
+ INFO:hf-to-gguf:blk.14.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
70
+ INFO:hf-to-gguf:blk.14.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
71
+ INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
72
+ INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
73
+ INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
74
+ INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
75
+ INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
76
+ INFO:hf-to-gguf:blk.15.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
77
+ INFO:hf-to-gguf:blk.15.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
78
+ INFO:hf-to-gguf:blk.15.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
79
+ INFO:hf-to-gguf:blk.15.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
80
+ INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
81
+ INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
82
+ INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
83
+ INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
84
+ INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
85
+ INFO:hf-to-gguf:blk.16.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
86
+ INFO:hf-to-gguf:blk.16.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
87
+ INFO:hf-to-gguf:blk.16.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
88
+ INFO:hf-to-gguf:blk.16.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
89
+ INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
90
+ INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
91
+ INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
92
+ INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
93
+ INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
94
+ INFO:hf-to-gguf:blk.17.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
95
+ INFO:hf-to-gguf:blk.17.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
96
+ INFO:hf-to-gguf:blk.17.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
97
+ INFO:hf-to-gguf:blk.17.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
98
+ INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
99
+ INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
100
+ INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
101
+ INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
102
+ INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
103
+ INFO:hf-to-gguf:blk.18.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
104
+ INFO:hf-to-gguf:blk.18.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
105
+ INFO:hf-to-gguf:blk.18.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
106
+ INFO:hf-to-gguf:blk.18.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
107
+ INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
108
+ INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
109
+ INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
110
+ INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
111
+ INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
112
+ INFO:hf-to-gguf:blk.19.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
113
+ INFO:hf-to-gguf:blk.19.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
114
+ INFO:hf-to-gguf:blk.19.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
115
+ INFO:hf-to-gguf:blk.19.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
116
+ INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
117
+ INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
118
+ INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
119
+ INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
120
+ INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
121
+ INFO:hf-to-gguf:blk.2.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
122
+ INFO:hf-to-gguf:blk.2.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
123
+ INFO:hf-to-gguf:blk.2.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
124
+ INFO:hf-to-gguf:blk.2.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
125
+ INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
126
+ INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
127
+ INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
128
+ INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
129
+ INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
130
+ INFO:hf-to-gguf:blk.20.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
131
+ INFO:hf-to-gguf:blk.20.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
132
+ INFO:hf-to-gguf:blk.20.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
133
+ INFO:hf-to-gguf:blk.20.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
134
+ INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
135
+ INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
136
+ INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
137
+ INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
138
+ INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
139
+ INFO:hf-to-gguf:blk.21.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
140
+ INFO:hf-to-gguf:blk.21.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
141
+ INFO:hf-to-gguf:blk.21.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
142
+ INFO:hf-to-gguf:blk.21.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
143
+ INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
144
+ INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
145
+ INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
146
+ INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
147
+ INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
148
+ INFO:hf-to-gguf:blk.22.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
149
+ INFO:hf-to-gguf:blk.22.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
150
+ INFO:hf-to-gguf:blk.22.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
151
+ INFO:hf-to-gguf:blk.22.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
152
+ INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
153
+ INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
154
+ INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
155
+ INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
156
+ INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
157
+ INFO:hf-to-gguf:blk.23.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
158
+ INFO:hf-to-gguf:blk.23.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
159
+ INFO:hf-to-gguf:blk.23.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
160
+ INFO:hf-to-gguf:blk.23.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
161
+ INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
162
+ INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
163
+ INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
164
+ INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
165
+ INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
166
+ INFO:hf-to-gguf:blk.3.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
167
+ INFO:hf-to-gguf:blk.3.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
168
+ INFO:hf-to-gguf:blk.3.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
169
+ INFO:hf-to-gguf:blk.3.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
170
+ INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
171
+ INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
172
+ INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
173
+ INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
174
+ INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
175
+ INFO:hf-to-gguf:blk.4.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
176
+ INFO:hf-to-gguf:blk.4.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
177
+ INFO:hf-to-gguf:blk.4.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
178
+ INFO:hf-to-gguf:blk.4.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
179
+ INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
180
+ INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
181
+ INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
182
+ INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
183
+ INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
184
+ INFO:hf-to-gguf:blk.5.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
185
+ INFO:hf-to-gguf:blk.5.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
186
+ INFO:hf-to-gguf:blk.5.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
187
+ INFO:hf-to-gguf:blk.5.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
188
+ INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
189
+ INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
190
+ INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
191
+ INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
192
+ INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
193
+ INFO:hf-to-gguf:blk.6.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
194
+ INFO:hf-to-gguf:blk.6.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
195
+ INFO:hf-to-gguf:blk.6.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
196
+ INFO:hf-to-gguf:blk.6.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
197
+ INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
198
+ INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
199
+ INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
200
+ INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
201
+ INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
202
+ INFO:hf-to-gguf:blk.7.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
203
+ INFO:hf-to-gguf:blk.7.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
204
+ INFO:hf-to-gguf:blk.7.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
205
+ INFO:hf-to-gguf:blk.7.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
206
+ INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
207
+ INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
208
+ INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
209
+ INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
210
+ INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
211
+ INFO:hf-to-gguf:blk.8.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
212
+ INFO:hf-to-gguf:blk.8.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
213
+ INFO:hf-to-gguf:blk.8.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
214
+ INFO:hf-to-gguf:blk.8.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
215
+ INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
216
+ INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
217
+ INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
218
+ INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
219
+ INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
220
+ INFO:hf-to-gguf:blk.9.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
221
+ INFO:hf-to-gguf:blk.9.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
222
+ INFO:hf-to-gguf:blk.9.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
223
+ INFO:hf-to-gguf:blk.9.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
224
+ INFO:hf-to-gguf:output_norm.weight, torch.bfloat16 --> F32, shape = {2048}
225
+ INFO:hf-to-gguf:Set meta model
226
+ INFO:hf-to-gguf:Set model parameters
227
+ INFO:hf-to-gguf:gguf: context length = 8192
228
+ INFO:hf-to-gguf:gguf: embedding length = 2048
229
+ INFO:hf-to-gguf:gguf: feed forward length = 5440
230
+ INFO:hf-to-gguf:gguf: head count = 16
231
+ INFO:hf-to-gguf:gguf: key-value head count = 16
232
+ INFO:hf-to-gguf:gguf: rope theta = 10000.0
233
+ INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-05
234
+ INFO:hf-to-gguf:gguf: file type = 32
235
+ INFO:hf-to-gguf:Set model tokenizer
236
+ INFO:gguf.vocab:Setting special token type bos to 1
237
+ INFO:gguf.vocab:Setting special token type eos to 2
238
+ INFO:gguf.vocab:Setting special token type unk to 0
239
+ INFO:gguf.vocab:Setting add_bos_token to True
240
+ INFO:gguf.vocab:Setting add_eos_token to False
241
+ INFO:hf-to-gguf:Set model quantization version
242
+ INFO:gguf.gguf_writer:Writing the following files:
243
+ INFO:gguf.gguf_writer:salamandra-2b_bf16.gguf: n_tensors = 219, total_size = 4.5G
244
+ Writing: 100%|████████████████████████████████████████████████████████████████████| 4.51G/4.51G [00:10<00:00, 419Mbyte/s]
245
+ INFO:hf-to-gguf:Model successfully exported to salamandra-2b_bf16.gguf
git_snapshot.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ upstream: https://huggingface.co/BSC-LT/salamandra-2b
2
+ branch: origin/main
3
+ hash: f1f8713d7c0114f1f60fc274428cd158039e7425
model.safetensors → imatrix/oscar/imatrix-dataset.txt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ec596bbf8f95408a120b009f320621cdcd2f861b3e373e770fe5a12754de66b
3
- size 4507005744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7830c495e88be1484f0fdd9a4f0b405cb88c0482283b0cf478238aabfcf2840
3
+ size 101131321
tokenizer.model → imatrix/oscar/imatrix.dat RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa490e57cebce5cb1a0a5b1a5d3fa4de05aee53dc3a44791f1c3401db44d802d
3
- size 4813274
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:194d585dd3bb27574dad0e5da861492ac47104929a182fcebbd88c41567a95e9
3
+ size 1707457
imatrix/oscar/langs/bg.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7db75e8d3ab29d086a9f95843710c8e26eb82492feaba8892d98f7cd42c958c5
3
+ size 15369727
imatrix/oscar/langs/ca.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:620a3fb2dadf2ad55e32248e1dc5455f7f27470a1e25893f0470eaf53b9e4364
3
+ size 2452735
imatrix/oscar/langs/cs.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bac9a98f322e5773b8e3e6abcd06ce243ad9ce1edf992db0ab5d9eea6e17490
3
+ size 6374140
imatrix/oscar/langs/cy.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41072c1798493fd14eaebd5ee9820d71ea803ee42b793f40248ab536ff5421c4
3
+ size 1711900
imatrix/oscar/langs/da.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e312b03a2cdf388224719c8e8b4b34911db629e77a818a6b272151c0746308a
3
+ size 2845693
imatrix/oscar/langs/de.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4c2008f2af94e4f62cca8dd03eb4e439b916b689b82545345369bec5c81a024
3
+ size 2820404
imatrix/oscar/langs/el.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fd2393f1b32910ca111e8a3d86a29b3c7ad5d2ca81ce756411c34f133759e1d
3
+ size 22016087
imatrix/oscar/langs/en.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af4f2a2b491f7a7c5eede69a5869762d7f2b4ef9f5e8960f3e34c96754f3e4b0
3
+ size 6204830
imatrix/oscar/langs/es.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71fbce9f6c2c8875a37c98bd8eed4144b64c91596214ff35676a3893d2d810ad
3
+ size 4007613
imatrix/oscar/langs/et.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e706f41af29936a9033192e73b11637bd0160c5d176beae051e9fbc2f8719d0
3
+ size 2366889
imatrix/oscar/langs/eu.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce9ef1dc787847b03b0d6af4bee5a097b38f55a9a3c3e9fc85727c1b50a2c964
3
+ size 793185
imatrix/oscar/langs/fi.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:652fc7fb857799b7e01382240f340324a116d1bd0d60f7ac78c2112b2b264581
3
+ size 4324807
imatrix/oscar/langs/fr.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75ff98d097421bc9d6dad95c84904fb25241123e6a397a47759dd5dba1b54246
3
+ size 3227190
imatrix/oscar/langs/ga.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21683a371855abef07d7f84b05a03617d60e969498eea2ba3b589f3997c2d6aa
3
+ size 1912591
imatrix/oscar/langs/gl.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7025affdb2538372f012f4213048ebaca72aa14fbbb14983d91f821aa5a42ef6
3
+ size 1375166
imatrix/oscar/langs/hr.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:838dce29fce8638d515bc64e3d33763a25c2f13a1ae5878e1412f23797345411
3
+ size 469023
imatrix/oscar/langs/hu.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d856286560de976671741744705c7ef227be2dd648bcf320fe63a0a6ef1aeea1
3
+ size 6021141
imatrix/oscar/langs/it.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:220bc92cd65fbd5b432a453debe102e1b0c947a492cec6d3ff5a5b87cf3c7eac
3
+ size 3893483
imatrix/oscar/langs/lt.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db1dff9403e72501447e46fb959b92740ca94afe57ada684e010e8dae3875559
3
+ size 3543428
imatrix/oscar/langs/lv.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0fa438a0385c7b8d5f6cdefdc7a025af13bd85916409c3cc0087d2589b91d2a
3
+ size 2837848
imatrix/oscar/langs/mt.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:097c73ca100485b1b6f7cdfee2e34ef6cf85bdf350cbf91e0328017adbcdab73
3
+ size 966065
imatrix/oscar/langs/nl.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:749ff8293fdf1552e4e1c309431f4305260676945882c721e4287c1e7608d6c7
3
+ size 3201009
imatrix/oscar/langs/nn.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:996c7311ab41ee25bee21d9125e8bb7fefcc3352ca4edaf2429b8e5ff0f3ad42
3
+ size 473060
imatrix/oscar/langs/no.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:018f84ce699d6ce8dcffc1f4ea9ce69ce29f60dff5c822039c1575f41e6f92fa
3
+ size 2004094
imatrix/oscar/langs/oc.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4f77cb54290063b6089120728e15468db7b212bf617c4678014503c866ede5c
3
+ size 672153
imatrix/oscar/langs/pl.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8d98cc00d727be32032107f9deae185aa2aac04d79a563b39069b50c288d09d
3
+ size 3187625
imatrix/oscar/langs/pt.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d66c7bb0058423bf5bb97809c907bffa439e2ca48eac4a1fd87a0c2475c25c
3
+ size 3828558