update for quantization
Browse filesimatrix
record of change
q5_k_m
bf16
iq2_xs
iq3_m
iq3_s
iq3_xs
iq3_xxs
iq4_nl
iq4_xs
q3_k_l
q3_k_m
q3_k_s
q4_k_m
q4_k_s
q5_k_s
q6_k
q8_0
removing safetensors
Temporarily remove LFS tracking for salamandra_header.png
Remove problematic salamandra_header.png from the repository
Fully remove salamandra_header.png from cache and LFS tracking
Track large files with Git LFS
Remove salamandra_header.png from LFS tracking
Add salamandra_header.png to LFS
update git attributes
Ensure all LFS-tracked model files are added
removing safetensors
removing duplicate data
removing duplicate data
lfs the imatrix
This view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +24 -1
- .gitignore +191 -0
- IQ2_XS_log.txt +339 -0
- IQ3_M_log.txt +339 -0
- IQ3_S_log.txt +339 -0
- IQ3_XS_log.txt +339 -0
- IQ3_XXS_log.txt +339 -0
- IQ4_NL_log.txt +266 -0
- IQ4_XS_log.txt +339 -0
- Q3_K_L_log.txt +339 -0
- Q3_K_M_log.txt +339 -0
- Q3_K_S_log.txt +339 -0
- Q4_K_M_log.txt +339 -0
- Q4_K_S_log.txt +339 -0
- Q5_K_M_log.txt +339 -0
- Q5_K_S_log.txt +339 -0
- Q6_K_log.txt +339 -0
- Q8_0_log.txt +266 -0
- README.md +54 -1
- bf16_log.txt +245 -0
- git_snapshot.txt +3 -0
- model.safetensors → imatrix/oscar/imatrix-dataset.txt +2 -2
- tokenizer.model → imatrix/oscar/imatrix.dat +2 -2
- imatrix/oscar/langs/bg.txt +3 -0
- imatrix/oscar/langs/ca.txt +3 -0
- imatrix/oscar/langs/cs.txt +3 -0
- imatrix/oscar/langs/cy.txt +3 -0
- imatrix/oscar/langs/da.txt +3 -0
- imatrix/oscar/langs/de.txt +3 -0
- imatrix/oscar/langs/el.txt +3 -0
- imatrix/oscar/langs/en.txt +3 -0
- imatrix/oscar/langs/es.txt +3 -0
- imatrix/oscar/langs/et.txt +3 -0
- imatrix/oscar/langs/eu.txt +3 -0
- imatrix/oscar/langs/fi.txt +3 -0
- imatrix/oscar/langs/fr.txt +3 -0
- imatrix/oscar/langs/ga.txt +3 -0
- imatrix/oscar/langs/gl.txt +3 -0
- imatrix/oscar/langs/hr.txt +3 -0
- imatrix/oscar/langs/hu.txt +3 -0
- imatrix/oscar/langs/it.txt +3 -0
- imatrix/oscar/langs/lt.txt +3 -0
- imatrix/oscar/langs/lv.txt +3 -0
- imatrix/oscar/langs/mt.txt +3 -0
- imatrix/oscar/langs/nl.txt +3 -0
- imatrix/oscar/langs/nn.txt +3 -0
- imatrix/oscar/langs/no.txt +3 -0
- imatrix/oscar/langs/oc.txt +3 -0
- imatrix/oscar/langs/pl.txt +3 -0
- imatrix/oscar/langs/pt.txt +3 -0
.gitattributes
CHANGED
@@ -33,5 +33,28 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
images/salamandra_header.png filter=lfs diff=lfs merge=lfs -text
|
37 |
-
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
salamandra-2b_IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
salamandra-2b_Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
38 |
+
salamandra-2b_Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
39 |
+
salamandra-2b_Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
40 |
+
salamandra-2b_IQ3_S.gguf filter=lfs diff=lfs merge=lfs -text
|
41 |
+
salamandra-2b_Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
|
42 |
+
salamandra-2b_Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
43 |
+
salamandra-2b_Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
44 |
+
salamandra-2b_Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
45 |
+
salamandra-2b_Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
|
46 |
+
salamandra-2b_IQ3_M.gguf filter=lfs diff=lfs merge=lfs -text
|
47 |
+
salamandra-2b_IQ3_XS.gguf filter=lfs diff=lfs merge=lfs -text
|
48 |
+
salamandra-2b_Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
49 |
+
salamandra-2b_bf16.gguf filter=lfs diff=lfs merge=lfs -text
|
50 |
+
salamandra-2b_IQ2_XS.gguf filter=lfs diff=lfs merge=lfs -text
|
51 |
+
salamandra-2b_IQ3_XXS.gguf filter=lfs diff=lfs merge=lfs -text
|
52 |
+
salamandra-2b_IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text
|
53 |
+
imatrix/oscar/imatrix-dataset.txt filter=lfs diff=lfs merge=lfs -text
|
54 |
+
imatrix/oscar/langs/bg.txt filter=lfs diff=lfs merge=lfs -text
|
55 |
+
imatrix/oscar/langs/el.txt filter=lfs diff=lfs merge=lfs -text
|
56 |
+
imatrix/oscar/langs/ru.txt filter=lfs diff=lfs merge=lfs -text
|
57 |
+
imatrix/oscar/langs/uk.txt filter=lfs diff=lfs merge=lfs -text
|
58 |
+
imatrix/oscar/langs/*.txt filter=lfs diff=lfs merge=lfs -text
|
59 |
images/salamandra_header.png filter=lfs diff=lfs merge=lfs -text
|
60 |
+
imatrix/oscar/imatrix.dat filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.aider*
|
2 |
+
|
3 |
+
# General
|
4 |
+
.DS_Store
|
5 |
+
.AppleDouble
|
6 |
+
.LSOverride
|
7 |
+
|
8 |
+
# Icon must end with two \r
|
9 |
+
Icon
|
10 |
+
|
11 |
+
# Thumbnails
|
12 |
+
._*
|
13 |
+
|
14 |
+
# Files that might appear in the root of a volume
|
15 |
+
.DocumentRevisions-V100
|
16 |
+
.fseventsd
|
17 |
+
.Spotlight-V100
|
18 |
+
.TemporaryItems
|
19 |
+
.Trashes
|
20 |
+
.VolumeIcon.icns
|
21 |
+
.com.apple.timemachine.donotpresent
|
22 |
+
|
23 |
+
# Directories potentially created on remote AFP share
|
24 |
+
.AppleDB
|
25 |
+
.AppleDesktop
|
26 |
+
Network Trash Folder
|
27 |
+
Temporary Items
|
28 |
+
.apdisk
|
29 |
+
|
30 |
+
# Byte-compiled / optimized / DLL files
|
31 |
+
__pycache__/
|
32 |
+
*.py[cod]
|
33 |
+
*$py.class
|
34 |
+
|
35 |
+
# C extensions
|
36 |
+
*.so
|
37 |
+
|
38 |
+
# Distribution / packaging
|
39 |
+
.Python
|
40 |
+
build/
|
41 |
+
develop-eggs/
|
42 |
+
dist/
|
43 |
+
downloads/
|
44 |
+
eggs/
|
45 |
+
.eggs/
|
46 |
+
lib/
|
47 |
+
lib64/
|
48 |
+
parts/
|
49 |
+
sdist/
|
50 |
+
var/
|
51 |
+
wheels/
|
52 |
+
share/python-wheels/
|
53 |
+
*.egg-info/
|
54 |
+
.installed.cfg
|
55 |
+
*.egg
|
56 |
+
MANIFEST
|
57 |
+
|
58 |
+
# PyInstaller
|
59 |
+
# Usually these files are written by a python script from a template
|
60 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
61 |
+
*.manifest
|
62 |
+
*.spec
|
63 |
+
|
64 |
+
# Installer logs
|
65 |
+
pip-log.txt
|
66 |
+
pip-delete-this-directory.txt
|
67 |
+
|
68 |
+
# Unit test / coverage reports
|
69 |
+
htmlcov/
|
70 |
+
.tox/
|
71 |
+
.nox/
|
72 |
+
.coverage
|
73 |
+
.coverage.*
|
74 |
+
.cache
|
75 |
+
nosetests.xml
|
76 |
+
coverage.xml
|
77 |
+
*.cover
|
78 |
+
*.py,cover
|
79 |
+
.hypothesis/
|
80 |
+
.pytest_cache/
|
81 |
+
cover/
|
82 |
+
|
83 |
+
# Translations
|
84 |
+
*.mo
|
85 |
+
*.pot
|
86 |
+
|
87 |
+
# Django stuff:
|
88 |
+
*.log
|
89 |
+
local_settings.py
|
90 |
+
db.sqlite3
|
91 |
+
db.sqlite3-journal
|
92 |
+
|
93 |
+
# Flask stuff:
|
94 |
+
instance/
|
95 |
+
.webassets-cache
|
96 |
+
|
97 |
+
# Scrapy stuff:
|
98 |
+
.scrapy
|
99 |
+
|
100 |
+
# Sphinx documentation
|
101 |
+
docs/_build/
|
102 |
+
|
103 |
+
# PyBuilder
|
104 |
+
.pybuilder/
|
105 |
+
target/
|
106 |
+
|
107 |
+
# Jupyter Notebook
|
108 |
+
.ipynb_checkpoints
|
109 |
+
|
110 |
+
# IPython
|
111 |
+
profile_default/
|
112 |
+
ipython_config.py
|
113 |
+
|
114 |
+
# pyenv
|
115 |
+
# For a library or package, you might want to ignore these files since the code is
|
116 |
+
# intended to run in multiple environments; otherwise, check them in:
|
117 |
+
# .python-version
|
118 |
+
|
119 |
+
# pipenv
|
120 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
121 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
122 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
123 |
+
# install all needed dependencies.
|
124 |
+
#Pipfile.lock
|
125 |
+
|
126 |
+
# poetry
|
127 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
128 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
129 |
+
# commonly ignored for libraries.
|
130 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
131 |
+
#poetry.lock
|
132 |
+
|
133 |
+
# pdm
|
134 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
135 |
+
#pdm.lock
|
136 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
137 |
+
# in version control.
|
138 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
139 |
+
.pdm.toml
|
140 |
+
.pdm-python
|
141 |
+
.pdm-build/
|
142 |
+
|
143 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
144 |
+
__pypackages__/
|
145 |
+
|
146 |
+
# Celery stuff
|
147 |
+
celerybeat-schedule
|
148 |
+
celerybeat.pid
|
149 |
+
|
150 |
+
# SageMath parsed files
|
151 |
+
*.sage.py
|
152 |
+
|
153 |
+
# Environments
|
154 |
+
.env
|
155 |
+
.venv
|
156 |
+
env/
|
157 |
+
venv/
|
158 |
+
ENV/
|
159 |
+
env.bak/
|
160 |
+
venv.bak/
|
161 |
+
|
162 |
+
# Spyder project settings
|
163 |
+
.spyderproject
|
164 |
+
.spyproject
|
165 |
+
|
166 |
+
# Rope project settings
|
167 |
+
.ropeproject
|
168 |
+
|
169 |
+
# mkdocs documentation
|
170 |
+
/site
|
171 |
+
|
172 |
+
# mypy
|
173 |
+
.mypy_cache/
|
174 |
+
.dmypy.json
|
175 |
+
dmypy.json
|
176 |
+
|
177 |
+
# Pyre type checker
|
178 |
+
.pyre/
|
179 |
+
|
180 |
+
# pytype static type analyzer
|
181 |
+
.pytype/
|
182 |
+
|
183 |
+
# Cython debug symbols
|
184 |
+
cython_debug/
|
185 |
+
|
186 |
+
# PyCharm
|
187 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
188 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
189 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
190 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
191 |
+
#.idea/
|
IQ2_XS_log.txt
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_IQ2_XS.gguf' as IQ2_XS
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to q2_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 164.06 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
47 |
+
|
48 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q2_K - using fallback quantization iq4_nl
|
49 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
50 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
51 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
52 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
53 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
54 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
55 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
56 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
57 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
58 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
59 |
+
|
60 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q2_K - using fallback quantization iq4_nl
|
61 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
62 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
63 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
64 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
65 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
66 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
67 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
68 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
69 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
70 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
71 |
+
|
72 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q2_K - using fallback quantization iq4_nl
|
73 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
74 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
75 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
76 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
78 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
79 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
80 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
81 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
83 |
+
|
84 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
85 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
86 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
87 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
88 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
89 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
90 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
91 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
92 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
93 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
94 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
95 |
+
|
96 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
97 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
98 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
99 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
100 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
101 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
102 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
103 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
104 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
105 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
106 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
107 |
+
|
108 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
109 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
110 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
111 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
112 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
114 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
115 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
116 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
117 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
119 |
+
|
120 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
121 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
122 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
123 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
124 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
125 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
126 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
127 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
128 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
129 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
130 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
131 |
+
|
132 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
133 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
134 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
135 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
136 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
137 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
138 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
139 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
140 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
141 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
142 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
143 |
+
|
144 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
145 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
146 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
147 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
148 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
150 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
151 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
152 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
153 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
155 |
+
|
156 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
157 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
158 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
159 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
160 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
161 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
162 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
163 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
164 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
165 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
166 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
167 |
+
|
168 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
169 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
170 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
171 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
172 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
173 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
174 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
175 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
176 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
177 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
178 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
179 |
+
|
180 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
181 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
182 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
183 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
184 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
186 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
187 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
188 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
189 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
191 |
+
|
192 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
193 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
194 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
195 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
196 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
197 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
198 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
199 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
200 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
201 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
202 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
203 |
+
|
204 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
205 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
206 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
207 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
208 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
209 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
210 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
211 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
212 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
213 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
214 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
215 |
+
|
216 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
217 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
218 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
219 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
220 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
222 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
223 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
224 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
225 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
227 |
+
|
228 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
229 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
230 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
231 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
232 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
233 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
234 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
235 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
236 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
237 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
238 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
239 |
+
|
240 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
241 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
242 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
243 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
244 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
245 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
246 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
247 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
248 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
249 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
250 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
251 |
+
|
252 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
253 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
254 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
255 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
256 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
258 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
259 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
260 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
261 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
263 |
+
|
264 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
265 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
266 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
267 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
268 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
269 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
270 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
271 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
272 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
273 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
274 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
275 |
+
|
276 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
277 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
278 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
279 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
280 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
281 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
282 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
283 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
284 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
285 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
286 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
287 |
+
|
288 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
289 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
290 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
291 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
292 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
293 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
294 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
295 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
296 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
297 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
298 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
299 |
+
|
300 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
301 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
302 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
303 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
304 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
305 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
306 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
307 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
308 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
309 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
310 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
311 |
+
|
312 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
313 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
314 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
315 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
316 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
317 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
318 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
319 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
320 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
321 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
322 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
323 |
+
|
324 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq2_xs - using fallback quantization iq4_nl
|
325 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
326 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
327 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq2_xs .. size = 21.25 MiB -> 3.07 MiB
|
328 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
329 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
330 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
331 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_xs .. size = 8.00 MiB -> 1.16 MiB
|
332 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q2_K .. size = 8.00 MiB -> 1.31 MiB
|
333 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
334 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
335 |
+
llama_model_quantize_internal: quant size = 1570.05 MB
|
336 |
+
llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
|
337 |
+
|
338 |
+
main: quantize time = 33024.88 ms
|
339 |
+
main: total time = 33024.88 ms
|
IQ3_M_log.txt
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_IQ3_M.gguf' as IQ3_M
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to iq3_s .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 214.84 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
47 |
+
|
48 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
49 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
50 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
51 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
52 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
53 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
54 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
55 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
56 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
57 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
58 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
59 |
+
|
60 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
61 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
62 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
63 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
64 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
65 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
66 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
67 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
68 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
69 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
70 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
71 |
+
|
72 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
73 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
74 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
75 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
76 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
78 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
79 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
80 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
81 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
83 |
+
|
84 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
85 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
86 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
87 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
88 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
89 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
90 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
91 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
92 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
93 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
94 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
95 |
+
|
96 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
97 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
98 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
99 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
100 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
101 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
102 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
103 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
104 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
105 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
106 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
107 |
+
|
108 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
109 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
110 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
111 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
112 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
114 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
115 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
116 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
117 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
119 |
+
|
120 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
121 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
122 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
123 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
124 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
125 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
126 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
127 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
128 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
129 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
130 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
131 |
+
|
132 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
133 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
134 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
135 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
136 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
137 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
138 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
139 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
140 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
141 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
142 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
143 |
+
|
144 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
145 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
146 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
147 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
148 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
150 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
151 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
152 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
153 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
155 |
+
|
156 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
157 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
158 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
159 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
160 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
161 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
162 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
163 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
164 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
165 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
166 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
167 |
+
|
168 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
169 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
170 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
171 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
172 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
173 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
174 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
175 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
176 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
177 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
178 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
179 |
+
|
180 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
181 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
182 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
183 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
184 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
186 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
187 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
188 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
189 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
191 |
+
|
192 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
193 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
194 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
195 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
196 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
197 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
198 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
199 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
200 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
201 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
202 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
203 |
+
|
204 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
205 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
206 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
207 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
208 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
209 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
210 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
211 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
212 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
213 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
214 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
215 |
+
|
216 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
217 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
218 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
219 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
220 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
222 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
223 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
224 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
225 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
227 |
+
|
228 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
229 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
230 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
231 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
232 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
233 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
234 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
235 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
236 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
237 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
238 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
239 |
+
|
240 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
241 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
242 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
243 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
244 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
245 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
246 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
247 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
248 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
249 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
250 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
251 |
+
|
252 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
253 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
254 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
255 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
256 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
258 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
259 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
260 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
261 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
263 |
+
|
264 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
265 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
266 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
267 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
268 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
269 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
270 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
271 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
272 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
273 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
274 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
275 |
+
|
276 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
277 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
278 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
279 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
280 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
281 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
282 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
283 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
284 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
285 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
286 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
287 |
+
|
288 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
289 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
290 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
291 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
292 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
293 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
294 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
295 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
296 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
297 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
298 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
299 |
+
|
300 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
301 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
302 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
303 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
304 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
305 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
306 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
307 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
308 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
309 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
310 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
311 |
+
|
312 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
313 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
314 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
315 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
316 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
317 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
318 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
319 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
320 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
321 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
322 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
323 |
+
|
324 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
325 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
326 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
327 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
328 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
329 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
330 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
331 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
332 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
333 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
334 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
335 |
+
llama_model_quantize_internal: quant size = 1772.29 MB
|
336 |
+
llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
|
337 |
+
|
338 |
+
main: quantize time = 20053.13 ms
|
339 |
+
main: total time = 20053.13 ms
|
IQ3_S_log.txt
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_IQ3_S.gguf' as IQ3_S
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to iq3_s .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 214.84 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
47 |
+
|
48 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
49 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
50 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
51 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
52 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
53 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
54 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
55 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
56 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
57 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
58 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
59 |
+
|
60 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
61 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
62 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
63 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
64 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
65 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
66 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
67 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
68 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
69 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
70 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
71 |
+
|
72 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
73 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
74 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
75 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
76 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
78 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
79 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
80 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
81 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
83 |
+
|
84 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
85 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
86 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
87 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
88 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
89 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
90 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
91 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
92 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
93 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
94 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
95 |
+
|
96 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
97 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
98 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
99 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
100 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
101 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
102 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
103 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
104 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
105 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
106 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
107 |
+
|
108 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
109 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
110 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
111 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
112 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
114 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
115 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
116 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
117 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
119 |
+
|
120 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
121 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
122 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
123 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
124 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
125 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
126 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
127 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
128 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
129 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
130 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
131 |
+
|
132 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
133 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
134 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
135 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
136 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
137 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
138 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
139 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
140 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
141 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
142 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
143 |
+
|
144 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
145 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
146 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
147 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
148 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
150 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
151 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
152 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
153 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
155 |
+
|
156 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
157 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
158 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
159 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
160 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
161 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
162 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
163 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
164 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
165 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
166 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
167 |
+
|
168 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
169 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
170 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
171 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
172 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
173 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
174 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
175 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
176 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
177 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
178 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
179 |
+
|
180 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
181 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
182 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
183 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
184 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
186 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
187 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
188 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
189 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
191 |
+
|
192 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
193 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
194 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
195 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
196 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
197 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
198 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
199 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
200 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
201 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
202 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
203 |
+
|
204 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
205 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
206 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
207 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
208 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
209 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
210 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
211 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
212 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
213 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
214 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
215 |
+
|
216 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
217 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
218 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
219 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
220 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
222 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
223 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
224 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
225 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
227 |
+
|
228 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
229 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
230 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
231 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
232 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
233 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
234 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
235 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
236 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
237 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
238 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
239 |
+
|
240 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
241 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
242 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
243 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
244 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
245 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
246 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
247 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
248 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
249 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
250 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
251 |
+
|
252 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
253 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
254 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
255 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
256 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
258 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
259 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
260 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
261 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
263 |
+
|
264 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
265 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
266 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
267 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
268 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
269 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
270 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
271 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
272 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
273 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
274 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
275 |
+
|
276 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
277 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
278 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
279 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
280 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
281 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
282 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
283 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
284 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
285 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
286 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
287 |
+
|
288 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
289 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
290 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
291 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
292 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
293 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
294 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
295 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
296 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
297 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
298 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
299 |
+
|
300 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
301 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
302 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
303 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
304 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
305 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
306 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
307 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
308 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
309 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
310 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
311 |
+
|
312 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
313 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
314 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
315 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
316 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
317 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
318 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
319 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
320 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
321 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
322 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
323 |
+
|
324 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
325 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
326 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
327 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
328 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
329 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
330 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
331 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
332 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
333 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
334 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
335 |
+
llama_model_quantize_internal: quant size = 1742.80 MB
|
336 |
+
llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
|
337 |
+
|
338 |
+
main: quantize time = 21645.04 ms
|
339 |
+
main: total time = 21645.04 ms
|
IQ3_XS_log.txt
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_IQ3_XS.gguf' as IQ3_XS
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to iq3_s .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 214.84 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
47 |
+
|
48 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
49 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
50 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
51 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
52 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
53 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
54 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
55 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
56 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
57 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
58 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
59 |
+
|
60 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
61 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
62 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
63 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
64 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
65 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
66 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
67 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
68 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
69 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
70 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
71 |
+
|
72 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
73 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
74 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
75 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
76 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
78 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
79 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
80 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
81 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
83 |
+
|
84 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
85 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
86 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
87 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
88 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
89 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
90 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
91 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
92 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
93 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
94 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
95 |
+
|
96 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
97 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
98 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
99 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
100 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
101 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
102 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
103 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
104 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
105 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
106 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
107 |
+
|
108 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
109 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
110 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
111 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
112 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
114 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
115 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
116 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
117 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
119 |
+
|
120 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
121 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
122 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
123 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
124 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
125 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
126 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
127 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
128 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
129 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
130 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
131 |
+
|
132 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
133 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
134 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
135 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
136 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
137 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
138 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
139 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
140 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
141 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
142 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
143 |
+
|
144 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
145 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
146 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
147 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
148 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
150 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
151 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
152 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
153 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
155 |
+
|
156 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
157 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
158 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
159 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
160 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
161 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
162 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
163 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
164 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
165 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
166 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
167 |
+
|
168 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
169 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
170 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
171 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
172 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
173 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
174 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
175 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
176 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
177 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
178 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
179 |
+
|
180 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
181 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
182 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
183 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
184 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
186 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
187 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
188 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
189 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
191 |
+
|
192 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
193 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
194 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
195 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
196 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
197 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
198 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
199 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
200 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
201 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
202 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
203 |
+
|
204 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
205 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
206 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
207 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
208 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
209 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
210 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
211 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
212 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
213 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
214 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
215 |
+
|
216 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
217 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
218 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
219 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
220 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
222 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
223 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
224 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
225 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
227 |
+
|
228 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
229 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
230 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
231 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
232 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
233 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
234 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
235 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
236 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
237 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
238 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
239 |
+
|
240 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
241 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
242 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
243 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
244 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
245 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
246 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
247 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
248 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
249 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
250 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
251 |
+
|
252 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
253 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
254 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
255 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
256 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
258 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
259 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
260 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
261 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
263 |
+
|
264 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
265 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
266 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
267 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
268 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
269 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
270 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
271 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
272 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
273 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
274 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
275 |
+
|
276 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
277 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
278 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
279 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
280 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
281 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
282 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
283 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
284 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
285 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
286 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
287 |
+
|
288 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
289 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
290 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
291 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
292 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
293 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
294 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
295 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
296 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
297 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
298 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
299 |
+
|
300 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
301 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
302 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
303 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
304 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
305 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
306 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
307 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
308 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
309 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
310 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
311 |
+
|
312 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
313 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
314 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
315 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
316 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
317 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
318 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
319 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
320 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
321 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
322 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
323 |
+
|
324 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_s - using fallback quantization iq4_nl
|
325 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
326 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
327 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_s .. size = 21.25 MiB -> 4.57 MiB
|
328 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
329 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
330 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
331 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
332 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
333 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
334 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
335 |
+
llama_model_quantize_internal: quant size = 1715.88 MB
|
336 |
+
llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
|
337 |
+
|
338 |
+
main: quantize time = 25070.00 ms
|
339 |
+
main: total time = 25070.00 ms
|
IQ3_XXS_log.txt
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_IQ3_XXS.gguf' as IQ3_XXS
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to iq3_s .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 214.84 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
47 |
+
|
48 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
49 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
50 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
51 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
52 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
53 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
54 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
55 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
56 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
57 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
58 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
59 |
+
|
60 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
61 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
62 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
63 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
64 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
65 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
66 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
67 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
68 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
69 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
70 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
71 |
+
|
72 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
73 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
74 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
75 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
76 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
78 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
79 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
80 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
81 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
83 |
+
|
84 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
85 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
86 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
87 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
88 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
89 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
90 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
91 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
92 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
93 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
94 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
95 |
+
|
96 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
97 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
98 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
99 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
100 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
101 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
102 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
103 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
104 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
105 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
106 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
107 |
+
|
108 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
109 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
110 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
111 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
112 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
114 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
115 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
116 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
117 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
119 |
+
|
120 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
121 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
122 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
123 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
124 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
125 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
126 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
127 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
128 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
129 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
130 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
131 |
+
|
132 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
133 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
134 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
135 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
136 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
137 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
138 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
139 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
140 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
141 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
142 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
143 |
+
|
144 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
145 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
146 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
147 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
148 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
150 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
151 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
152 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
153 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
155 |
+
|
156 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
157 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
158 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
159 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
160 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
161 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
162 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
163 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
164 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
165 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
166 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
167 |
+
|
168 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
169 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
170 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
171 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
172 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
173 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
174 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
175 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
176 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
177 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
178 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
179 |
+
|
180 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
181 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
182 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
183 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
184 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
186 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
187 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
188 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
189 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
191 |
+
|
192 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
193 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
194 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
195 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
196 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
197 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
198 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
199 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
200 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
201 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
202 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
203 |
+
|
204 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
205 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
206 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
207 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
208 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
209 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
210 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
211 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
212 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
213 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
214 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
215 |
+
|
216 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
217 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
218 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
219 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
220 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
222 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
223 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
224 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
225 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
227 |
+
|
228 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
229 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
230 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
231 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
232 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
233 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
234 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
235 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
236 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
237 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
238 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
239 |
+
|
240 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
241 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
242 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
243 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
244 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
245 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
246 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
247 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
248 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
249 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
250 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
251 |
+
|
252 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
253 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
254 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
255 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
256 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
258 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
259 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
260 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
261 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
263 |
+
|
264 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
265 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
266 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
267 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
268 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
269 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
270 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
271 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
272 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
273 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
274 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
275 |
+
|
276 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
277 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
278 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
279 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
280 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
281 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
282 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
283 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
284 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
285 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
286 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
287 |
+
|
288 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
289 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
290 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
291 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
292 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
293 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
294 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
295 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
296 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
297 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
298 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
299 |
+
|
300 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
301 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
302 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
303 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
304 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
305 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
306 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
307 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
308 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
309 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
310 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
311 |
+
|
312 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
313 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
314 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
315 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
316 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
317 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
318 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
319 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
320 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
321 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
322 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
323 |
+
|
324 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq3_xxs - using fallback quantization iq4_nl
|
325 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
326 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
327 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq3_xxs .. size = 21.25 MiB -> 4.07 MiB
|
328 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
329 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
330 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_s .. size = 8.00 MiB -> 1.72 MiB
|
331 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq2_s .. size = 8.00 MiB -> 1.28 MiB
|
332 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq3_xxs .. size = 8.00 MiB -> 1.53 MiB
|
333 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
334 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
335 |
+
llama_model_quantize_internal: quant size = 1693.40 MB
|
336 |
+
llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
|
337 |
+
|
338 |
+
main: quantize time = 28893.81 ms
|
339 |
+
main: total time = 28893.81 ms
|
IQ4_NL_log.txt
ADDED
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_IQ4_NL.gguf' as IQ4_NL
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to iq4_nl .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 281.25 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
47 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
48 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
49 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
50 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
51 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
52 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
53 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
54 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
55 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
56 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
57 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
58 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
59 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
60 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
61 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
62 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
63 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
64 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
65 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
66 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
67 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
68 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
69 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
70 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
71 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
72 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
73 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
74 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
75 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
76 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
78 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
79 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
80 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
81 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
83 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
84 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
85 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
86 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
87 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
88 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
89 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
90 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
91 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
92 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
93 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
94 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
95 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
96 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
97 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
98 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
99 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
100 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
101 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
102 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
103 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
104 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
105 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
106 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
107 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
108 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
109 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
110 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
111 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
112 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
114 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
115 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
116 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
117 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
119 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
120 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
121 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
122 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
123 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
124 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
125 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
126 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
127 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
128 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
129 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
130 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
131 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
132 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
133 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
134 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
135 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
136 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
137 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
138 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
139 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
140 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
141 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
142 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
143 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
144 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
145 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
146 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
147 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
148 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
150 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
151 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
152 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
153 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
155 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
156 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
157 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
158 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
159 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
160 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
161 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
162 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
163 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
164 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
165 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
166 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
167 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
168 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
169 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
170 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
171 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
172 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
173 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
174 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
175 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
176 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
177 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
178 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
179 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
180 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
181 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
182 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
183 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
184 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
186 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
187 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
188 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
189 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
191 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
192 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
193 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
194 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
195 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
196 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
197 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
198 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
199 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
200 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
201 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
202 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
203 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
204 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
205 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
206 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
207 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
208 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
209 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
210 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
211 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
212 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
213 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
214 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
215 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
216 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
217 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
218 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
219 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
220 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
222 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
223 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
224 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
225 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
227 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
228 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
229 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
230 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
231 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
232 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
233 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
234 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
235 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
236 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
237 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
238 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
239 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
240 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
241 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
242 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
243 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
244 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
245 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
246 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
247 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
248 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
249 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
250 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
251 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
252 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
253 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
254 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
255 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
256 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
258 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
259 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
260 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_nl .. size = 8.00 MiB -> 2.25 MiB
|
261 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
263 |
+
llama_model_quantize_internal: quant size = 1927.95 MB
|
264 |
+
|
265 |
+
main: quantize time = 18024.91 ms
|
266 |
+
main: total time = 18024.91 ms
|
IQ4_XS_log.txt
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_IQ4_XS.gguf' as IQ4_XS
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to iq4_xs .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 265.62 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
47 |
+
|
48 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
49 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
50 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
51 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
52 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
53 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
54 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
55 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
56 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
57 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
58 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
59 |
+
|
60 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
61 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
62 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
63 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
64 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
65 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
66 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
67 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
68 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
69 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
70 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
71 |
+
|
72 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
73 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
74 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
75 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
76 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
78 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
79 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
80 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
81 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
83 |
+
|
84 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
85 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
86 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
87 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
88 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
89 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
90 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
91 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
92 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
93 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
94 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
95 |
+
|
96 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
97 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
98 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
99 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
100 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
101 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
102 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
103 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
104 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
105 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
106 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
107 |
+
|
108 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
109 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
110 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
111 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
112 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
114 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
115 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
116 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
117 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
119 |
+
|
120 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
121 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
122 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
123 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
124 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
125 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
126 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
127 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
128 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
129 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
130 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
131 |
+
|
132 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
133 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
134 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
135 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
136 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
137 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
138 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
139 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
140 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
141 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
142 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
143 |
+
|
144 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
145 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
146 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
147 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
148 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
150 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
151 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
152 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
153 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
155 |
+
|
156 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
157 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
158 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
159 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
160 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
161 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
162 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
163 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
164 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
165 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
166 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
167 |
+
|
168 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
169 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
170 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
171 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
172 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
173 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
174 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
175 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
176 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
177 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
178 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
179 |
+
|
180 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
181 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
182 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
183 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
184 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
186 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
187 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
188 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
189 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
191 |
+
|
192 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
193 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
194 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
195 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
196 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
197 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
198 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
199 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
200 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
201 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
202 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
203 |
+
|
204 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
205 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
206 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
207 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
208 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
209 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
210 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
211 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
212 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
213 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
214 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
215 |
+
|
216 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
217 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
218 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
219 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
220 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
222 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
223 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
224 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
225 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
227 |
+
|
228 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
229 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
230 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
231 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
232 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
233 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
234 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
235 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
236 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
237 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
238 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
239 |
+
|
240 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
241 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
242 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
243 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
244 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
245 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
246 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
247 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
248 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
249 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
250 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
251 |
+
|
252 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
253 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
254 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
255 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
256 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
258 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
259 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
260 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
261 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
263 |
+
|
264 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
265 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
266 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
267 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
268 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
269 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
270 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
271 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
272 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
273 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
274 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
275 |
+
|
276 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
277 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
278 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
279 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
280 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
281 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
282 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
283 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
284 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
285 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
286 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
287 |
+
|
288 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
289 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
290 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
291 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
292 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
293 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
294 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
295 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
296 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
297 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
298 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
299 |
+
|
300 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
301 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
302 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
303 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
304 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
305 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
306 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
307 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
308 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
309 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
310 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
311 |
+
|
312 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
313 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
314 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
315 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
316 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
317 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
318 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
319 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
320 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
321 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
322 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
323 |
+
|
324 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl
|
325 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
326 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
327 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to iq4_xs .. size = 21.25 MiB -> 5.64 MiB
|
328 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
329 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
330 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
331 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
332 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to iq4_xs .. size = 8.00 MiB -> 2.12 MiB
|
333 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
334 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
335 |
+
llama_model_quantize_internal: quant size = 1884.38 MB
|
336 |
+
llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
|
337 |
+
|
338 |
+
main: quantize time = 18604.79 ms
|
339 |
+
main: total time = 18604.79 ms
|
Q3_K_L_log.txt
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q3_K_L.gguf' as Q3_K_L
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to q3_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 214.84 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
47 |
+
|
48 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
49 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
50 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
51 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
52 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
53 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
54 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
55 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
56 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
57 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
58 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
59 |
+
|
60 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
61 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
62 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
63 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
64 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
65 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
66 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
67 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
68 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
69 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
70 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
71 |
+
|
72 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
73 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
74 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
75 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
76 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
78 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
79 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
80 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
81 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
83 |
+
|
84 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
85 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
86 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
87 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
88 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
89 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
90 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
91 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
92 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
93 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
94 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
95 |
+
|
96 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
97 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
98 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
99 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
100 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
101 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
102 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
103 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
104 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
105 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
106 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
107 |
+
|
108 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
109 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
110 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
111 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
112 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
114 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
115 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
116 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
117 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
119 |
+
|
120 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
121 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
122 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
123 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
124 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
125 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
126 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
127 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
128 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
129 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
130 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
131 |
+
|
132 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
133 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
134 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
135 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
136 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
137 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
138 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
139 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
140 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
141 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
142 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
143 |
+
|
144 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
145 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
146 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
147 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
148 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
150 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
151 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
152 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
153 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
155 |
+
|
156 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
157 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
158 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
159 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
160 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
161 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
162 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
163 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
164 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
165 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
166 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
167 |
+
|
168 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
169 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
170 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
171 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
172 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
173 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
174 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
175 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
176 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
177 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
178 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
179 |
+
|
180 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
181 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
182 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
183 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
184 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
186 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
187 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
188 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
189 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
191 |
+
|
192 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
193 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
194 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
195 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
196 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
197 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
198 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
199 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
200 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
201 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
202 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
203 |
+
|
204 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
205 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
206 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
207 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
208 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
209 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
210 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
211 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
212 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
213 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
214 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
215 |
+
|
216 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
217 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
218 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
219 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
220 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
222 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
223 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
224 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
225 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
227 |
+
|
228 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
229 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
230 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
231 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
232 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
233 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
234 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
235 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
236 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
237 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
238 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
239 |
+
|
240 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
241 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
242 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
243 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
244 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
245 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
246 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
247 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
248 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
249 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
250 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
251 |
+
|
252 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
253 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
254 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
255 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
256 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
258 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
259 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
260 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
261 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
263 |
+
|
264 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
265 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
266 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
267 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
268 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
269 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
270 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
271 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
272 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
273 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
274 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
275 |
+
|
276 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
277 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
278 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
279 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
280 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
281 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
282 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
283 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
284 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
285 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
286 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
287 |
+
|
288 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
289 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
290 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
291 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
292 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
293 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
294 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
295 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
296 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
297 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
298 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
299 |
+
|
300 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
301 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
302 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
303 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
304 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
305 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
306 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
307 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
308 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
309 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
310 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
311 |
+
|
312 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
313 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
314 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
315 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
316 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
317 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
318 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
319 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
320 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
321 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
322 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
323 |
+
|
324 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
325 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
326 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
327 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
328 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
329 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
330 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
331 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
332 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
333 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
334 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
335 |
+
llama_model_quantize_internal: quant size = 1840.12 MB
|
336 |
+
llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
|
337 |
+
|
338 |
+
main: quantize time = 6546.55 ms
|
339 |
+
main: total time = 6546.55 ms
|
Q3_K_M_log.txt
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q3_K_M.gguf' as Q3_K_M
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to q3_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 214.84 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
47 |
+
|
48 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
49 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
50 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
51 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
52 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
53 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
54 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
55 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
56 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
57 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
58 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
59 |
+
|
60 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
61 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
62 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
63 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
64 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
65 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
66 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
67 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
68 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
69 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
70 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
71 |
+
|
72 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
73 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
74 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
75 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
76 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
78 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
79 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
80 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
81 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
83 |
+
|
84 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
85 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
86 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
87 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
88 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
89 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
90 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
91 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
92 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
93 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
94 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
95 |
+
|
96 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
97 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
98 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
99 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
100 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
101 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
102 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
103 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
104 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
105 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
106 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
107 |
+
|
108 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
109 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
110 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
111 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
112 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
114 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
115 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
116 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
117 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
119 |
+
|
120 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
121 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
122 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
123 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
124 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
125 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
126 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
127 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
128 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
129 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
130 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
131 |
+
|
132 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
133 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
134 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
135 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
136 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
137 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
138 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
139 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
140 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
141 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
142 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
143 |
+
|
144 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
145 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
146 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
147 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
148 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
150 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
151 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
152 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
153 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
155 |
+
|
156 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
157 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
158 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
159 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
160 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
161 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
162 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
163 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
164 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
165 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
166 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
167 |
+
|
168 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
169 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
170 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
171 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
172 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
173 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
174 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
175 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
176 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
177 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
178 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
179 |
+
|
180 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
181 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
182 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
183 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
184 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
186 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
187 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
188 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
189 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
191 |
+
|
192 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
193 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
194 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
195 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
196 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
197 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
198 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
199 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
200 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
201 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
202 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
203 |
+
|
204 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
205 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
206 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
207 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
208 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
209 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
210 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
211 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
212 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
213 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
214 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
215 |
+
|
216 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
217 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
218 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
219 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
220 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
222 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
223 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
224 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
225 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
227 |
+
|
228 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
229 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
230 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
231 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
232 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
233 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
234 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
235 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
236 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
237 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
238 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
239 |
+
|
240 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
241 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
242 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
243 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
244 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
245 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
246 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
247 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
248 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
249 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
250 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
251 |
+
|
252 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
253 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
254 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
255 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
256 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
258 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
259 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
260 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
261 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
263 |
+
|
264 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
265 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
266 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
267 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
268 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
269 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
270 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
271 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
272 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
273 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
274 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
275 |
+
|
276 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
277 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
278 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
279 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
280 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
281 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
282 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
283 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
284 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
285 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
286 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
287 |
+
|
288 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
289 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
290 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
291 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
292 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
293 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
294 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
295 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
296 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
297 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
298 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
299 |
+
|
300 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
301 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
302 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
303 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
304 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
305 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
306 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
307 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
308 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
309 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
310 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
311 |
+
|
312 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
313 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
314 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
315 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
316 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
317 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
318 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
319 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
320 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
321 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
322 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
323 |
+
|
324 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
325 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
326 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
327 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
328 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
329 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
330 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
331 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
332 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
333 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
334 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
335 |
+
llama_model_quantize_internal: quant size = 1801.84 MB
|
336 |
+
llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
|
337 |
+
|
338 |
+
main: quantize time = 5383.58 ms
|
339 |
+
main: total time = 5383.58 ms
|
Q3_K_S_log.txt
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q3_K_S.gguf' as Q3_K_S
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to q3_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 214.84 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
47 |
+
|
48 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
49 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
50 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
51 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
52 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
53 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
54 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
55 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
56 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
57 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
58 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
59 |
+
|
60 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
61 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
62 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
63 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
64 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
65 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
66 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
67 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
68 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
69 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
70 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
71 |
+
|
72 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
73 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
74 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
75 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
76 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
78 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
79 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
80 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
81 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
83 |
+
|
84 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
85 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
86 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
87 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
88 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
89 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
90 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
91 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
92 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
93 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
94 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
95 |
+
|
96 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
97 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
98 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
99 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
100 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
101 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
102 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
103 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
104 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
105 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
106 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
107 |
+
|
108 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
109 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
110 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
111 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
112 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
114 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
115 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
116 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
117 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
119 |
+
|
120 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
121 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
122 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
123 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
124 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
125 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
126 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
127 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
128 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
129 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
130 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
131 |
+
|
132 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
133 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
134 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
135 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
136 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
137 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
138 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
139 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
140 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
141 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
142 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
143 |
+
|
144 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
145 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
146 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
147 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
148 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
150 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
151 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
152 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
153 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
155 |
+
|
156 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
157 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
158 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
159 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
160 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
161 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
162 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
163 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
164 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
165 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
166 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
167 |
+
|
168 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
169 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
170 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
171 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
172 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
173 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
174 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
175 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
176 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
177 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
178 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
179 |
+
|
180 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
181 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
182 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
183 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
184 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
186 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
187 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
188 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
189 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
191 |
+
|
192 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
193 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
194 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
195 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
196 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
197 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
198 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
199 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
200 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
201 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
202 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
203 |
+
|
204 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
205 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
206 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
207 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
208 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
209 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
210 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
211 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
212 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
213 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
214 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
215 |
+
|
216 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
217 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
218 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
219 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
220 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
222 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
223 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
224 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
225 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
227 |
+
|
228 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
229 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
230 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
231 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
232 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
233 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
234 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
235 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
236 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
237 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
238 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
239 |
+
|
240 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
241 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
242 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
243 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
244 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
245 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
246 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
247 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
248 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
249 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
250 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
251 |
+
|
252 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
253 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
254 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
255 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
256 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
258 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
259 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
260 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
261 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
263 |
+
|
264 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
265 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
266 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
267 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
268 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
269 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
270 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
271 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
272 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
273 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
274 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
275 |
+
|
276 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
277 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
278 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
279 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
280 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
281 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
282 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
283 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
284 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
285 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
286 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
287 |
+
|
288 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
289 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
290 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
291 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
292 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
293 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
294 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
295 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
296 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
297 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
298 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
299 |
+
|
300 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
301 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
302 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
303 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
304 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
305 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
306 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
307 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
308 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
309 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
310 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
311 |
+
|
312 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
313 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
314 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
315 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
316 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
317 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
318 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
319 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
320 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
321 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
322 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
323 |
+
|
324 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q3_K - using fallback quantization iq4_nl
|
325 |
+
converting to iq4_nl .. size = 21.25 MiB -> 5.98 MiB
|
326 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
327 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q3_K .. size = 21.25 MiB -> 4.57 MiB
|
328 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
329 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
330 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
331 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
332 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q3_K .. size = 8.00 MiB -> 1.72 MiB
|
333 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
334 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
335 |
+
llama_model_quantize_internal: quant size = 1742.80 MB
|
336 |
+
llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
|
337 |
+
|
338 |
+
main: quantize time = 6724.72 ms
|
339 |
+
main: total time = 6724.72 ms
|
Q4_K_M_log.txt
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q4_K_M.gguf' as Q4_K_M
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to q4_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 281.25 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
47 |
+
|
48 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
49 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
50 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
51 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
52 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
53 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
54 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
55 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
56 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
57 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
58 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
59 |
+
|
60 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
61 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
62 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
63 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
64 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
65 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
66 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
67 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
68 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
69 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
70 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
71 |
+
|
72 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
73 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
74 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
75 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
76 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
78 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
79 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
80 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
81 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
83 |
+
|
84 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
85 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
86 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
87 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
88 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
89 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
90 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
91 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
92 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
93 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
94 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
95 |
+
|
96 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
97 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
98 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
99 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
100 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
101 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
102 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
103 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
104 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
105 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
106 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
107 |
+
|
108 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
109 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
110 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
111 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
112 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
114 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
115 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
116 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
117 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
119 |
+
|
120 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
121 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
122 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
123 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
124 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
125 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
126 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
127 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
128 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
129 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
130 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
131 |
+
|
132 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
133 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
134 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
135 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
136 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
137 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
138 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
139 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
140 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
141 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
142 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
143 |
+
|
144 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
145 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
146 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
147 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
148 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
150 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
151 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
152 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
153 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
155 |
+
|
156 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
157 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
158 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
159 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
160 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
161 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
162 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
163 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
164 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
165 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
166 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
167 |
+
|
168 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
169 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
170 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
171 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
172 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
173 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
174 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
175 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
176 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
177 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
178 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
179 |
+
|
180 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
181 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
182 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
183 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
184 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
186 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
187 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
188 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
189 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
191 |
+
|
192 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
193 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
194 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
195 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
196 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
197 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
198 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
199 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
200 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
201 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
202 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
203 |
+
|
204 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
205 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
206 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
207 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
208 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
209 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
210 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
211 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
212 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
213 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
214 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
215 |
+
|
216 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
217 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
218 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
219 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
220 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
222 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
223 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
224 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
225 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
227 |
+
|
228 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
229 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
230 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
231 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
232 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
233 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
234 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
235 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
236 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
237 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
238 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
239 |
+
|
240 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
241 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
242 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
243 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
244 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
245 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
246 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
247 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
248 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
249 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
250 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
251 |
+
|
252 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
253 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
254 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
255 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
256 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
258 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
259 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
260 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
261 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
263 |
+
|
264 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
265 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
266 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
267 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
268 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
269 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
270 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
271 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
272 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
273 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
274 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
275 |
+
|
276 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
277 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
278 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
279 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
280 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
281 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
282 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
283 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
284 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
285 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
286 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
287 |
+
|
288 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
289 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
290 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
291 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
292 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
293 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
294 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
295 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
296 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
297 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
298 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
299 |
+
|
300 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
301 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
302 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
303 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
304 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
305 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
306 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
307 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
308 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
309 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
310 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
311 |
+
|
312 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
313 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
314 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
315 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
316 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
317 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
318 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
319 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
320 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
321 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
322 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
323 |
+
|
324 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
325 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
326 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
327 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
328 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
329 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
330 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
331 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
332 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
333 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
334 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
335 |
+
llama_model_quantize_internal: quant size = 2020.01 MB
|
336 |
+
llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
|
337 |
+
|
338 |
+
main: quantize time = 8902.98 ms
|
339 |
+
main: total time = 8902.98 ms
|
Q4_K_S_log.txt
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q4_K_S.gguf' as Q4_K_S
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to q4_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 281.25 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
47 |
+
|
48 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
49 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
50 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
51 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
52 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
53 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
54 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
55 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
56 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
57 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
58 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
59 |
+
|
60 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
61 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
62 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
63 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
64 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
65 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
66 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
67 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
68 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
69 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
70 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
71 |
+
|
72 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
73 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
74 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
75 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
76 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
78 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
79 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
80 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
81 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
83 |
+
|
84 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
85 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
86 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
87 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
88 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
89 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
90 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
91 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
92 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
93 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
94 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
95 |
+
|
96 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
97 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
98 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
99 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
100 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
101 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
102 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
103 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
104 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
105 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
106 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
107 |
+
|
108 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
109 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
110 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
111 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
112 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
114 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
115 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
116 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
117 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
119 |
+
|
120 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
121 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
122 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
123 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
124 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
125 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
126 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
127 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
128 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
129 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
130 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
131 |
+
|
132 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
133 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
134 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
135 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
136 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
137 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
138 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
139 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
140 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
141 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
142 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
143 |
+
|
144 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
145 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
146 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
147 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
148 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
150 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
151 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
152 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
153 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
155 |
+
|
156 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
157 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
158 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
159 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
160 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
161 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
162 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
163 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
164 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
165 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
166 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
167 |
+
|
168 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
169 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
170 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
171 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
172 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
173 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
174 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
175 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
176 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
177 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
178 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
179 |
+
|
180 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
181 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
182 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
183 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
184 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
186 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
187 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
188 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
189 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
191 |
+
|
192 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
193 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
194 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
195 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
196 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
197 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
198 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
199 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
200 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
201 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
202 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
203 |
+
|
204 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
205 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
206 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
207 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
208 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
209 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
210 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
211 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
212 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
213 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
214 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
215 |
+
|
216 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
217 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
218 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
219 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
220 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
222 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
223 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
224 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
225 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
227 |
+
|
228 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
229 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
230 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
231 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
232 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
233 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
234 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
235 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
236 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
237 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
238 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
239 |
+
|
240 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
241 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
242 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
243 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
244 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
245 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
246 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
247 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
248 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
249 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
250 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
251 |
+
|
252 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
253 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
254 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
255 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
256 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
258 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
259 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
260 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
261 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
263 |
+
|
264 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
265 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
266 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
267 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
268 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
269 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
270 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
271 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
272 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
273 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
274 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
275 |
+
|
276 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
277 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
278 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
279 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
280 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
281 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
282 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
283 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
284 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
285 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
286 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
287 |
+
|
288 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
289 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
290 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
291 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
292 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
293 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
294 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
295 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
296 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
297 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
298 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
299 |
+
|
300 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
301 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
302 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
303 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
304 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
305 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
306 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
307 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
308 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
309 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
310 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
311 |
+
|
312 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
313 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
314 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
315 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
316 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
317 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
318 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
319 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
320 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
321 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
322 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
323 |
+
|
324 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q4_K - using fallback quantization q5_0
|
325 |
+
converting to q5_0 .. size = 21.25 MiB -> 7.30 MiB
|
326 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
327 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q4_K .. size = 21.25 MiB -> 5.98 MiB
|
328 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
329 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
330 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
331 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
332 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB
|
333 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
334 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
335 |
+
llama_model_quantize_internal: quant size = 1963.81 MB
|
336 |
+
llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
|
337 |
+
|
338 |
+
main: quantize time = 9350.38 ms
|
339 |
+
main: total time = 9350.38 ms
|
Q5_K_M_log.txt
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q5_K_M.gguf' as Q5_K_M
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to q5_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 343.75 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
47 |
+
|
48 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
49 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
50 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
51 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
52 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
53 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
54 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
55 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
56 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
57 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
58 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
59 |
+
|
60 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
61 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
62 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
63 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
64 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
65 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
66 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
67 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
68 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
69 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
70 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
71 |
+
|
72 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
73 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
74 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
75 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
76 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
78 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
79 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
80 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
81 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
83 |
+
|
84 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
85 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
86 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
87 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
88 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
89 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
90 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
91 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
92 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
93 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
94 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
95 |
+
|
96 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
97 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
98 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
99 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
100 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
101 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
102 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
103 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
104 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
105 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
106 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
107 |
+
|
108 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
109 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
110 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
111 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
112 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
114 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
115 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
116 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
117 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
119 |
+
|
120 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
121 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
122 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
123 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
124 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
125 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
126 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
127 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
128 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
129 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
130 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
131 |
+
|
132 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
133 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
134 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
135 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
136 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
137 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
138 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
139 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
140 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
141 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
142 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
143 |
+
|
144 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
145 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
146 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
147 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
148 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
150 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
151 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
152 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
153 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
155 |
+
|
156 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
157 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
158 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
159 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
160 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
161 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
162 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
163 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
164 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
165 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
166 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
167 |
+
|
168 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
169 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
170 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
171 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
172 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
173 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
174 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
175 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
176 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
177 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
178 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
179 |
+
|
180 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
181 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
182 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
183 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
184 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
186 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
187 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
188 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
189 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
191 |
+
|
192 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
193 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
194 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
195 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
196 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
197 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
198 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
199 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
200 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
201 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
202 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
203 |
+
|
204 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
205 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
206 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
207 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
208 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
209 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
210 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
211 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
212 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
213 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
214 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
215 |
+
|
216 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
217 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
218 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
219 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
220 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
222 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
223 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
224 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
225 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
227 |
+
|
228 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
229 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
230 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
231 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
232 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
233 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
234 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
235 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
236 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
237 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
238 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
239 |
+
|
240 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
241 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
242 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
243 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
244 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
245 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
246 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
247 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
248 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
249 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
250 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
251 |
+
|
252 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
253 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
254 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
255 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
256 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
258 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
259 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
260 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
261 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
263 |
+
|
264 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
265 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
266 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
267 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
268 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
269 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
270 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
271 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
272 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
273 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
274 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
275 |
+
|
276 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
277 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
278 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
279 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
280 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
281 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
282 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
283 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
284 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
285 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
286 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
287 |
+
|
288 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
289 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
290 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
291 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
292 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
293 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
294 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
295 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
296 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
297 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
298 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
299 |
+
|
300 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
301 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
302 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
303 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
304 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
305 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
306 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
307 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
308 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
309 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
310 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
311 |
+
|
312 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
313 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
314 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
315 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
316 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
317 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
318 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
319 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
320 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
321 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
322 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
323 |
+
|
324 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
325 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
326 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
327 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
328 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
329 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
330 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
331 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
332 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
333 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
334 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
335 |
+
llama_model_quantize_internal: quant size = 2196.23 MB
|
336 |
+
llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
|
337 |
+
|
338 |
+
main: quantize time = 9522.94 ms
|
339 |
+
main: total time = 9522.94 ms
|
Q5_K_S_log.txt
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q5_K_S.gguf' as Q5_K_S
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to q5_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 343.75 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
47 |
+
|
48 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
49 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
50 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
51 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
52 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
53 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
54 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
55 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
56 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
57 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
58 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
59 |
+
|
60 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
61 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
62 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
63 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
64 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
65 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
66 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
67 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
68 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
69 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
70 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
71 |
+
|
72 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
73 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
74 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
75 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
76 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
78 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
79 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
80 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
81 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
83 |
+
|
84 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
85 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
86 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
87 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
88 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
89 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
90 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
91 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
92 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
93 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
94 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
95 |
+
|
96 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
97 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
98 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
99 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
100 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
101 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
102 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
103 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
104 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
105 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
106 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
107 |
+
|
108 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
109 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
110 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
111 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
112 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
114 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
115 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
116 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
117 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
119 |
+
|
120 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
121 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
122 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
123 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
124 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
125 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
126 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
127 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
128 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
129 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
130 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
131 |
+
|
132 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
133 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
134 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
135 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
136 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
137 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
138 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
139 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
140 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
141 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
142 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
143 |
+
|
144 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
145 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
146 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
147 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
148 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
150 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
151 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
152 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
153 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
155 |
+
|
156 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
157 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
158 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
159 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
160 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
161 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
162 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
163 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
164 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
165 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
166 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
167 |
+
|
168 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
169 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
170 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
171 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
172 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
173 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
174 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
175 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
176 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
177 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
178 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
179 |
+
|
180 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
181 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
182 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
183 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
184 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
186 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
187 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
188 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
189 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
191 |
+
|
192 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
193 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
194 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
195 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
196 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
197 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
198 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
199 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
200 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
201 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
202 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
203 |
+
|
204 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
205 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
206 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
207 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
208 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
209 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
210 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
211 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
212 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
213 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
214 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
215 |
+
|
216 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
217 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
218 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
219 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
220 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
222 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
223 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
224 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
225 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
227 |
+
|
228 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
229 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
230 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
231 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
232 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
233 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
234 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
235 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
236 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
237 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
238 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
239 |
+
|
240 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
241 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
242 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
243 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
244 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
245 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
246 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
247 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
248 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
249 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
250 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
251 |
+
|
252 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
253 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
254 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
255 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
256 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
258 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
259 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
260 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
261 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
263 |
+
|
264 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
265 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
266 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
267 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
268 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
269 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
270 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
271 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
272 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
273 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
274 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
275 |
+
|
276 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
277 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
278 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
279 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
280 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
281 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
282 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
283 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
284 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
285 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
286 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
287 |
+
|
288 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
289 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
290 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
291 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
292 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
293 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
294 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
295 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
296 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
297 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
298 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
299 |
+
|
300 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
301 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
302 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
303 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
304 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
305 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
306 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
307 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
308 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
309 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
310 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
311 |
+
|
312 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
313 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
314 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
315 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
316 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
317 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
318 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
319 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
320 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
321 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
322 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
323 |
+
|
324 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q5_K - using fallback quantization q5_1
|
325 |
+
converting to q5_1 .. size = 21.25 MiB -> 7.97 MiB
|
326 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
327 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q5_K .. size = 21.25 MiB -> 7.30 MiB
|
328 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
329 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
330 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
331 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
332 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB
|
333 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
334 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
335 |
+
llama_model_quantize_internal: quant size = 2150.01 MB
|
336 |
+
llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
|
337 |
+
|
338 |
+
main: quantize time = 10361.94 ms
|
339 |
+
main: total time = 10361.94 ms
|
Q6_K_log.txt
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q6_K.gguf' as Q6_K
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to q6_K .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 410.16 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
47 |
+
|
48 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
49 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
50 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
51 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
52 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
53 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
54 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
55 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
56 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
57 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
58 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
59 |
+
|
60 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
61 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
62 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
63 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
64 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
65 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
66 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
67 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
68 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
69 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
70 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
71 |
+
|
72 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
73 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
74 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
75 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
76 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
78 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
79 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
80 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
81 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
83 |
+
|
84 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
85 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
86 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
87 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
88 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
89 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
90 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
91 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
92 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
93 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
94 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
95 |
+
|
96 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
97 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
98 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
99 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
100 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
101 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
102 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
103 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
104 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
105 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
106 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
107 |
+
|
108 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
109 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
110 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
111 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
112 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
114 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
115 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
116 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
117 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
119 |
+
|
120 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
121 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
122 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
123 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
124 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
125 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
126 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
127 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
128 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
129 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
130 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
131 |
+
|
132 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
133 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
134 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
135 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
136 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
137 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
138 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
139 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
140 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
141 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
142 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
143 |
+
|
144 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
145 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
146 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
147 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
148 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
150 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
151 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
152 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
153 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
155 |
+
|
156 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
157 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
158 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
159 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
160 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
161 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
162 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
163 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
164 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
165 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
166 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
167 |
+
|
168 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
169 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
170 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
171 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
172 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
173 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
174 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
175 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
176 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
177 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
178 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
179 |
+
|
180 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
181 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
182 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
183 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
184 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
186 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
187 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
188 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
189 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
191 |
+
|
192 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
193 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
194 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
195 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
196 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
197 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
198 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
199 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
200 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
201 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
202 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
203 |
+
|
204 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
205 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
206 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
207 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
208 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
209 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
210 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
211 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
212 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
213 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
214 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
215 |
+
|
216 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
217 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
218 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
219 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
220 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
222 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
223 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
224 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
225 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
227 |
+
|
228 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
229 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
230 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
231 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
232 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
233 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
234 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
235 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
236 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
237 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
238 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
239 |
+
|
240 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
241 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
242 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
243 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
244 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
245 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
246 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
247 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
248 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
249 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
250 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
251 |
+
|
252 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
253 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
254 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
255 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
256 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
258 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
259 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
260 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
261 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
263 |
+
|
264 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
265 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
266 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
267 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
268 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
269 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
270 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
271 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
272 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
273 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
274 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
275 |
+
|
276 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
277 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
278 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
279 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
280 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
281 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
282 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
283 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
284 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
285 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
286 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
287 |
+
|
288 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
289 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
290 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
291 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
292 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
293 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
294 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
295 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
296 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
297 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
298 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
299 |
+
|
300 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
301 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
302 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
303 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
304 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
305 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
306 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
307 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
308 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
309 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
310 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
311 |
+
|
312 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
313 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
314 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
315 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
316 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
317 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
318 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
319 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
320 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
321 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
322 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16,
|
323 |
+
|
324 |
+
llama_tensor_get_type : tensor cols 5440 x 2048 are not divisible by 256, required for q6_K - using fallback quantization q8_0
|
325 |
+
converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
326 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
327 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q6_K .. size = 21.25 MiB -> 8.72 MiB
|
328 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
329 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
330 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
331 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
332 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB
|
333 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
334 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
335 |
+
llama_model_quantize_internal: quant size = 2414.84 MB
|
336 |
+
llama_model_quantize_internal: WARNING: 24 of 169 tensor(s) required fallback quantization
|
337 |
+
|
338 |
+
main: quantize time = 4934.86 ms
|
339 |
+
main: total time = 4934.86 ms
|
Q8_0_log.txt
ADDED
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
main: build = 3906 (7eee341b)
|
2 |
+
main: built with Apple clang version 15.0.0 (clang-1500.3.9.4) for arm64-apple-darwin23.6.0
|
3 |
+
main: quantizing 'salamandra-2b_bf16.gguf' to './salamandra-2b_Q8_0.gguf' as Q8_0
|
4 |
+
llama_model_loader: loaded meta data with 29 key-value pairs and 219 tensors from salamandra-2b_bf16.gguf (version GGUF V3 (latest))
|
5 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
6 |
+
llama_model_loader: - kv 0: general.architecture str = llama
|
7 |
+
llama_model_loader: - kv 1: general.type str = model
|
8 |
+
llama_model_loader: - kv 2: general.size_label str = 2.3B
|
9 |
+
llama_model_loader: - kv 3: general.license str = apache-2.0
|
10 |
+
llama_model_loader: - kv 4: general.tags arr[str,1] = ["text-generation"]
|
11 |
+
llama_model_loader: - kv 5: general.languages arr[str,36] = ["bg", "ca", "code", "cs", "cy", "da"...
|
12 |
+
llama_model_loader: - kv 6: llama.block_count u32 = 24
|
13 |
+
llama_model_loader: - kv 7: llama.context_length u32 = 8192
|
14 |
+
llama_model_loader: - kv 8: llama.embedding_length u32 = 2048
|
15 |
+
llama_model_loader: - kv 9: llama.feed_forward_length u32 = 5440
|
16 |
+
llama_model_loader: - kv 10: llama.attention.head_count u32 = 16
|
17 |
+
llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 16
|
18 |
+
llama_model_loader: - kv 12: llama.rope.freq_base f32 = 10000.000000
|
19 |
+
llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
|
20 |
+
llama_model_loader: - kv 14: general.file_type u32 = 32
|
21 |
+
llama_model_loader: - kv 15: llama.vocab_size u32 = 256000
|
22 |
+
llama_model_loader: - kv 16: llama.rope.dimension_count u32 = 128
|
23 |
+
llama_model_loader: - kv 17: tokenizer.ggml.add_space_prefix bool = true
|
24 |
+
llama_model_loader: - kv 18: tokenizer.ggml.model str = llama
|
25 |
+
llama_model_loader: - kv 19: tokenizer.ggml.pre str = default
|
26 |
+
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,256000] = ["<unk>", "<s>", "</s>", "<pad>", "<|...
|
27 |
+
llama_model_loader: - kv 21: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...
|
28 |
+
llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
|
29 |
+
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 1
|
30 |
+
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
|
31 |
+
llama_model_loader: - kv 25: tokenizer.ggml.unknown_token_id u32 = 0
|
32 |
+
llama_model_loader: - kv 26: tokenizer.ggml.add_bos_token bool = true
|
33 |
+
llama_model_loader: - kv 27: tokenizer.ggml.add_eos_token bool = false
|
34 |
+
llama_model_loader: - kv 28: general.quantization_version u32 = 2
|
35 |
+
llama_model_loader: - type f32: 49 tensors
|
36 |
+
llama_model_loader: - type bf16: 170 tensors
|
37 |
+
================================ Have weights data with 168 entries
|
38 |
+
[ 1/ 219] output.weight - [ 2048, 256000, 1, 1], type = bf16, size = 1000.000 MB
|
39 |
+
[ 2/ 219] token_embd.weight - [ 2048, 256000, 1, 1], type = bf16,
|
40 |
+
====== llama_model_quantize_internal: did not find weights for token_embd.weight
|
41 |
+
converting to q8_0 .. load_imatrix: imatrix dataset='./imatrix/oscar/imatrix-dataset.txt'
|
42 |
+
load_imatrix: loaded 168 importance matrix entries from imatrix/oscar/imatrix.dat computed on 44176 chunks
|
43 |
+
prepare_imatrix: have 168 importance matrix entries
|
44 |
+
size = 1000.00 MiB -> 531.25 MiB
|
45 |
+
[ 3/ 219] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
46 |
+
[ 4/ 219] blk.0.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
47 |
+
[ 5/ 219] blk.0.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
48 |
+
[ 6/ 219] blk.0.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
49 |
+
[ 7/ 219] blk.0.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
50 |
+
[ 8/ 219] blk.0.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
51 |
+
[ 9/ 219] blk.0.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
52 |
+
[ 10/ 219] blk.0.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
53 |
+
[ 11/ 219] blk.0.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
54 |
+
[ 12/ 219] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
55 |
+
[ 13/ 219] blk.1.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
56 |
+
[ 14/ 219] blk.1.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
57 |
+
[ 15/ 219] blk.1.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
58 |
+
[ 16/ 219] blk.1.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
59 |
+
[ 17/ 219] blk.1.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
60 |
+
[ 18/ 219] blk.1.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
61 |
+
[ 19/ 219] blk.1.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
62 |
+
[ 20/ 219] blk.1.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
63 |
+
[ 21/ 219] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
64 |
+
[ 22/ 219] blk.10.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
65 |
+
[ 23/ 219] blk.10.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
66 |
+
[ 24/ 219] blk.10.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
67 |
+
[ 25/ 219] blk.10.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
68 |
+
[ 26/ 219] blk.10.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
69 |
+
[ 27/ 219] blk.10.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
70 |
+
[ 28/ 219] blk.10.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
71 |
+
[ 29/ 219] blk.10.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
72 |
+
[ 30/ 219] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
73 |
+
[ 31/ 219] blk.11.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
74 |
+
[ 32/ 219] blk.11.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
75 |
+
[ 33/ 219] blk.11.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
76 |
+
[ 34/ 219] blk.11.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
77 |
+
[ 35/ 219] blk.11.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
78 |
+
[ 36/ 219] blk.11.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
79 |
+
[ 37/ 219] blk.11.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
80 |
+
[ 38/ 219] blk.11.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
81 |
+
[ 39/ 219] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
82 |
+
[ 40/ 219] blk.12.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
83 |
+
[ 41/ 219] blk.12.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
84 |
+
[ 42/ 219] blk.12.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
85 |
+
[ 43/ 219] blk.12.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
86 |
+
[ 44/ 219] blk.12.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
87 |
+
[ 45/ 219] blk.12.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
88 |
+
[ 46/ 219] blk.12.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
89 |
+
[ 47/ 219] blk.12.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
90 |
+
[ 48/ 219] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
91 |
+
[ 49/ 219] blk.13.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
92 |
+
[ 50/ 219] blk.13.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
93 |
+
[ 51/ 219] blk.13.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
94 |
+
[ 52/ 219] blk.13.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
95 |
+
[ 53/ 219] blk.13.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
96 |
+
[ 54/ 219] blk.13.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
97 |
+
[ 55/ 219] blk.13.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
98 |
+
[ 56/ 219] blk.13.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
99 |
+
[ 57/ 219] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
100 |
+
[ 58/ 219] blk.14.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
101 |
+
[ 59/ 219] blk.14.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
102 |
+
[ 60/ 219] blk.14.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
103 |
+
[ 61/ 219] blk.14.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
104 |
+
[ 62/ 219] blk.14.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
105 |
+
[ 63/ 219] blk.14.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
106 |
+
[ 64/ 219] blk.14.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
107 |
+
[ 65/ 219] blk.14.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
108 |
+
[ 66/ 219] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
109 |
+
[ 67/ 219] blk.15.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
110 |
+
[ 68/ 219] blk.15.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
111 |
+
[ 69/ 219] blk.15.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
112 |
+
[ 70/ 219] blk.15.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
113 |
+
[ 71/ 219] blk.15.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
114 |
+
[ 72/ 219] blk.15.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
115 |
+
[ 73/ 219] blk.15.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
116 |
+
[ 74/ 219] blk.15.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
117 |
+
[ 75/ 219] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
118 |
+
[ 76/ 219] blk.16.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
119 |
+
[ 77/ 219] blk.16.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
120 |
+
[ 78/ 219] blk.16.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
121 |
+
[ 79/ 219] blk.16.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
122 |
+
[ 80/ 219] blk.16.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
123 |
+
[ 81/ 219] blk.16.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
124 |
+
[ 82/ 219] blk.16.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
125 |
+
[ 83/ 219] blk.16.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
126 |
+
[ 84/ 219] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
127 |
+
[ 85/ 219] blk.17.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
128 |
+
[ 86/ 219] blk.17.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
129 |
+
[ 87/ 219] blk.17.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
130 |
+
[ 88/ 219] blk.17.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
131 |
+
[ 89/ 219] blk.17.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
132 |
+
[ 90/ 219] blk.17.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
133 |
+
[ 91/ 219] blk.17.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
134 |
+
[ 92/ 219] blk.17.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
135 |
+
[ 93/ 219] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
136 |
+
[ 94/ 219] blk.18.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
137 |
+
[ 95/ 219] blk.18.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
138 |
+
[ 96/ 219] blk.18.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
139 |
+
[ 97/ 219] blk.18.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
140 |
+
[ 98/ 219] blk.18.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
141 |
+
[ 99/ 219] blk.18.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
142 |
+
[ 100/ 219] blk.18.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
143 |
+
[ 101/ 219] blk.18.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
144 |
+
[ 102/ 219] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
145 |
+
[ 103/ 219] blk.19.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
146 |
+
[ 104/ 219] blk.19.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
147 |
+
[ 105/ 219] blk.19.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
148 |
+
[ 106/ 219] blk.19.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
149 |
+
[ 107/ 219] blk.19.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
150 |
+
[ 108/ 219] blk.19.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
151 |
+
[ 109/ 219] blk.19.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
152 |
+
[ 110/ 219] blk.19.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
153 |
+
[ 111/ 219] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
154 |
+
[ 112/ 219] blk.2.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
155 |
+
[ 113/ 219] blk.2.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
156 |
+
[ 114/ 219] blk.2.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
157 |
+
[ 115/ 219] blk.2.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
158 |
+
[ 116/ 219] blk.2.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
159 |
+
[ 117/ 219] blk.2.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
160 |
+
[ 118/ 219] blk.2.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
161 |
+
[ 119/ 219] blk.2.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
162 |
+
[ 120/ 219] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
163 |
+
[ 121/ 219] blk.20.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
164 |
+
[ 122/ 219] blk.20.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
165 |
+
[ 123/ 219] blk.20.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
166 |
+
[ 124/ 219] blk.20.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
167 |
+
[ 125/ 219] blk.20.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
168 |
+
[ 126/ 219] blk.20.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
169 |
+
[ 127/ 219] blk.20.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
170 |
+
[ 128/ 219] blk.20.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
171 |
+
[ 129/ 219] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
172 |
+
[ 130/ 219] blk.21.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
173 |
+
[ 131/ 219] blk.21.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
174 |
+
[ 132/ 219] blk.21.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
175 |
+
[ 133/ 219] blk.21.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
176 |
+
[ 134/ 219] blk.21.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
177 |
+
[ 135/ 219] blk.21.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
178 |
+
[ 136/ 219] blk.21.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
179 |
+
[ 137/ 219] blk.21.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
180 |
+
[ 138/ 219] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
181 |
+
[ 139/ 219] blk.22.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
182 |
+
[ 140/ 219] blk.22.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
183 |
+
[ 141/ 219] blk.22.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
184 |
+
[ 142/ 219] blk.22.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
185 |
+
[ 143/ 219] blk.22.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
186 |
+
[ 144/ 219] blk.22.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
187 |
+
[ 145/ 219] blk.22.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
188 |
+
[ 146/ 219] blk.22.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
189 |
+
[ 147/ 219] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
190 |
+
[ 148/ 219] blk.23.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
191 |
+
[ 149/ 219] blk.23.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
192 |
+
[ 150/ 219] blk.23.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
193 |
+
[ 151/ 219] blk.23.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
194 |
+
[ 152/ 219] blk.23.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
195 |
+
[ 153/ 219] blk.23.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
196 |
+
[ 154/ 219] blk.23.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
197 |
+
[ 155/ 219] blk.23.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
198 |
+
[ 156/ 219] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
199 |
+
[ 157/ 219] blk.3.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
200 |
+
[ 158/ 219] blk.3.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
201 |
+
[ 159/ 219] blk.3.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
202 |
+
[ 160/ 219] blk.3.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
203 |
+
[ 161/ 219] blk.3.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
204 |
+
[ 162/ 219] blk.3.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
205 |
+
[ 163/ 219] blk.3.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
206 |
+
[ 164/ 219] blk.3.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
207 |
+
[ 165/ 219] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
208 |
+
[ 166/ 219] blk.4.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
209 |
+
[ 167/ 219] blk.4.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
210 |
+
[ 168/ 219] blk.4.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
211 |
+
[ 169/ 219] blk.4.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
212 |
+
[ 170/ 219] blk.4.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
213 |
+
[ 171/ 219] blk.4.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
214 |
+
[ 172/ 219] blk.4.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
215 |
+
[ 173/ 219] blk.4.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
216 |
+
[ 174/ 219] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
217 |
+
[ 175/ 219] blk.5.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
218 |
+
[ 176/ 219] blk.5.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
219 |
+
[ 177/ 219] blk.5.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
220 |
+
[ 178/ 219] blk.5.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
221 |
+
[ 179/ 219] blk.5.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
222 |
+
[ 180/ 219] blk.5.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
223 |
+
[ 181/ 219] blk.5.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
224 |
+
[ 182/ 219] blk.5.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
225 |
+
[ 183/ 219] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
226 |
+
[ 184/ 219] blk.6.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
227 |
+
[ 185/ 219] blk.6.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
228 |
+
[ 186/ 219] blk.6.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
229 |
+
[ 187/ 219] blk.6.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
230 |
+
[ 188/ 219] blk.6.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
231 |
+
[ 189/ 219] blk.6.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
232 |
+
[ 190/ 219] blk.6.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
233 |
+
[ 191/ 219] blk.6.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
234 |
+
[ 192/ 219] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
235 |
+
[ 193/ 219] blk.7.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
236 |
+
[ 194/ 219] blk.7.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
237 |
+
[ 195/ 219] blk.7.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
238 |
+
[ 196/ 219] blk.7.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
239 |
+
[ 197/ 219] blk.7.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
240 |
+
[ 198/ 219] blk.7.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
241 |
+
[ 199/ 219] blk.7.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
242 |
+
[ 200/ 219] blk.7.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
243 |
+
[ 201/ 219] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
244 |
+
[ 202/ 219] blk.8.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
245 |
+
[ 203/ 219] blk.8.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
246 |
+
[ 204/ 219] blk.8.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
247 |
+
[ 205/ 219] blk.8.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
248 |
+
[ 206/ 219] blk.8.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
249 |
+
[ 207/ 219] blk.8.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
250 |
+
[ 208/ 219] blk.8.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
251 |
+
[ 209/ 219] blk.8.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
252 |
+
[ 210/ 219] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
253 |
+
[ 211/ 219] blk.9.ffn_down.weight - [ 5440, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
254 |
+
[ 212/ 219] blk.9.ffn_gate.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
255 |
+
[ 213/ 219] blk.9.ffn_up.weight - [ 2048, 5440, 1, 1], type = bf16, converting to q8_0 .. size = 21.25 MiB -> 11.29 MiB
|
256 |
+
[ 214/ 219] blk.9.ffn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
257 |
+
[ 215/ 219] blk.9.attn_k.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
258 |
+
[ 216/ 219] blk.9.attn_output.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
259 |
+
[ 217/ 219] blk.9.attn_q.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
260 |
+
[ 218/ 219] blk.9.attn_v.weight - [ 2048, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
|
261 |
+
[ 219/ 219] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB
|
262 |
+
llama_model_quantize_internal: model size = 4298.38 MB
|
263 |
+
llama_model_quantize_internal: quant size = 2752.45 MB
|
264 |
+
|
265 |
+
main: quantize time = 3216.17 ms
|
266 |
+
main: total time = 3216.17 ms
|
README.md
CHANGED
@@ -27,7 +27,7 @@ language:
|
|
27 |
- mt
|
28 |
- nl
|
29 |
- nn
|
30 |
-
- no
|
31 |
- oc
|
32 |
- pl
|
33 |
- pt
|
@@ -41,6 +41,59 @@ language:
|
|
41 |
- uk
|
42 |
---
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
![](./images/salamandra_header.png)
|
45 |
|
46 |
# Salamandra Model Card
|
|
|
27 |
- mt
|
28 |
- nl
|
29 |
- nn
|
30 |
+
- \no
|
31 |
- oc
|
32 |
- pl
|
33 |
- pt
|
|
|
41 |
- uk
|
42 |
---
|
43 |
|
44 |
+
|
45 |
+
# **Quantization Summary**
|
46 |
+
|
47 |
+
- **IQ4_NL**: Best I quantization below **Q4** with minimal PPL impact.
|
48 |
+
- **Q5_K_M**: Excellent balance above **Q4**, recommended for most applications.
|
49 |
+
- **Q6_K**: Provides near-**bf16** performance with size savings.
|
50 |
+
|
51 |
+
---
|
52 |
+
|
53 |
+
# Quantization
|
54 |
+
|
55 |
+
### **Perplexity Comparison Table:**
|
56 |
+
|
57 |
+
| **Quantization Type** | **PPL** | **ln(PPL(Q)/PPL(bf16))** | **File Size** | **Notes** |
|
58 |
+
|-----------------------|------------|--------------------------|---------------|----------------------------------------------------------------|
|
59 |
+
| **IQ3_M** | 15.1995 | 0.079131 | 1.7G | Good size efficiency with acceptable PPL increase |
|
60 |
+
| **Q3_K_L** | 15.0444 | 0.068875 | 1.8G | Further size reduction with modest PPL increase |
|
61 |
+
| **IQ4_NL** | 14.5534 | 0.035693 | 1.9G | Good size reduction with minimal PPL impact (**recommended**) |
|
62 |
+
| **Q4_K_M** | 14.399 | 0.025028 | 2.0G | Smaller with acceptable PPL |
|
63 |
+
| **Q5_K_M** | 14.1299 | 0.006162 | 2.2G | Excellent balance of PPL and size (**recommended**) |
|
64 |
+
| **Q6_K** | 14.0675 | 0.001736 | 2.4G | Nearly lossless performance with reduced size |
|
65 |
+
| **bf16** | 14.0431 | 0.0 | 4.2G | Baseline |
|
66 |
+
|
67 |
+
---
|
68 |
+
|
69 |
+
### **Notes:**
|
70 |
+
|
71 |
+
- **Recommended Quantizations:**
|
72 |
+
- **IQ4_NL**: Represents the best of the I quantization types below **Q4**, achieving good size efficiency while maintaining low perplexity.
|
73 |
+
- **Q5_K_M**: Offers the best balance between low perplexity and reduced file size above **Q4**, making it ideal for most applications.
|
74 |
+
- **Q6_K**: Delivers nearly lossless performance compared to **bf16** with a reduced file size (2.4G vs. 4.2G). Ideal for scenarios requiring maximum accuracy with some size savings.
|
75 |
+
- **Non-recommended Quanizations:**
|
76 |
+
- **IQ3_M**: Offers a smaller file size (1.7G) with an acceptable PPL increase (15.1995), making it a solid choice for highly compressed models.
|
77 |
+
- **Q3_K_L**: Provides a slightly larger file size (1.8G) with an even better PPL (15.0444). Fits within the selection criteria for highly compressed models with log PPL diff <0.3.
|
78 |
+
- **Q4_K_M**: While **Q4_K_M** is not designated as "recommended" in this context, it is highly suitable for architectures like **Metal**, which run **I-quant** models slowly. For such architectures, **Q4_K_M** remains an excellent choice.
|
79 |
+
- **Q6_K** Similar to Q8_0, offers very close perplexity to bf16. Given its smaller file size than Q8_0 (2.4G vs. 2.7G), Q6_K provides a better size-to-performance trade-off. It was selected because it is nearly lossless and less than 2.5GB.
|
80 |
+
|
81 |
+
---
|
82 |
+
|
83 |
+
### **Defending the Selection:**
|
84 |
+
|
85 |
+
The selection of recommended models is designed to provide a spectrum of options that meet the following criteria:
|
86 |
+
|
87 |
+
- **Diversity in Quantization Types:**
|
88 |
+
- **I Quantization Below Q4:** **IQ4_NL** is included to offer an option that uses I quantization below the **Q4** level, balancing size and performance.
|
89 |
+
- **K Quantization At and Above Q4:** **Q4_K_M**, **Q5_K_M**, and **Q6_K** provide K quantization options at and above the **Q4** level, giving users choices based on their specific needs.
|
90 |
+
- **Highly Compressed Quantization (Q3 and below):** **IQ3_M** and **Q3_K_L** are included as they meet the selection criteria of log PPL diff <0.3 and are not redundant with other models.
|
91 |
+
|
92 |
+
- **Selection Criteria:**
|
93 |
+
- **Log PPL diff <0.3:** All included models have a log PPL difference under 0.3, ensuring that they maintain acceptable performance even when highly quantized.
|
94 |
+
- **No Multiple Models Within 100MB of the Same File Size:** Only one model is included per similar file size range to avoid redundancy. For example, **Q3_K_L** (1.8G) is included while other models like **IQ3_XS** (1.7G) are excluded due to overlapping file sizes and comparable PPL, ensuring a sparse yet comprehensive selection.
|
95 |
+
|
96 |
+
|
97 |
![](./images/salamandra_header.png)
|
98 |
|
99 |
# Salamandra Model Card
|
bf16_log.txt
ADDED
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/Users/Shared/Public/Github/llama.cpp/convert_hf_to_gguf.py --outtype bf16 . --outfile ./salamandra-2b_bf16.gguf
|
2 |
+
INFO:hf-to-gguf:Loading model:
|
3 |
+
INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
|
4 |
+
INFO:hf-to-gguf:Exporting model...
|
5 |
+
INFO:hf-to-gguf:gguf: loading model part 'model.safetensors'
|
6 |
+
INFO:hf-to-gguf:output.weight, torch.bfloat16 --> BF16, shape = {2048, 256000}
|
7 |
+
INFO:hf-to-gguf:token_embd.weight, torch.bfloat16 --> BF16, shape = {2048, 256000}
|
8 |
+
INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
9 |
+
INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
10 |
+
INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
11 |
+
INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
12 |
+
INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
13 |
+
INFO:hf-to-gguf:blk.0.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
14 |
+
INFO:hf-to-gguf:blk.0.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
15 |
+
INFO:hf-to-gguf:blk.0.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
16 |
+
INFO:hf-to-gguf:blk.0.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
17 |
+
INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
18 |
+
INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
19 |
+
INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
20 |
+
INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
21 |
+
INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
22 |
+
INFO:hf-to-gguf:blk.1.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
23 |
+
INFO:hf-to-gguf:blk.1.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
24 |
+
INFO:hf-to-gguf:blk.1.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
25 |
+
INFO:hf-to-gguf:blk.1.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
26 |
+
INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
27 |
+
INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
28 |
+
INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
29 |
+
INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
30 |
+
INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
31 |
+
INFO:hf-to-gguf:blk.10.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
32 |
+
INFO:hf-to-gguf:blk.10.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
33 |
+
INFO:hf-to-gguf:blk.10.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
34 |
+
INFO:hf-to-gguf:blk.10.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
35 |
+
INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
36 |
+
INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
37 |
+
INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
38 |
+
INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
39 |
+
INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
40 |
+
INFO:hf-to-gguf:blk.11.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
41 |
+
INFO:hf-to-gguf:blk.11.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
42 |
+
INFO:hf-to-gguf:blk.11.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
43 |
+
INFO:hf-to-gguf:blk.11.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
44 |
+
INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
45 |
+
INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
46 |
+
INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
47 |
+
INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
48 |
+
INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
49 |
+
INFO:hf-to-gguf:blk.12.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
50 |
+
INFO:hf-to-gguf:blk.12.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
51 |
+
INFO:hf-to-gguf:blk.12.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
52 |
+
INFO:hf-to-gguf:blk.12.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
53 |
+
INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
54 |
+
INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
55 |
+
INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
56 |
+
INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
57 |
+
INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
58 |
+
INFO:hf-to-gguf:blk.13.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
59 |
+
INFO:hf-to-gguf:blk.13.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
60 |
+
INFO:hf-to-gguf:blk.13.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
61 |
+
INFO:hf-to-gguf:blk.13.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
62 |
+
INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
63 |
+
INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
64 |
+
INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
65 |
+
INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
66 |
+
INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
67 |
+
INFO:hf-to-gguf:blk.14.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
68 |
+
INFO:hf-to-gguf:blk.14.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
69 |
+
INFO:hf-to-gguf:blk.14.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
70 |
+
INFO:hf-to-gguf:blk.14.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
71 |
+
INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
72 |
+
INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
73 |
+
INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
74 |
+
INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
75 |
+
INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
76 |
+
INFO:hf-to-gguf:blk.15.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
77 |
+
INFO:hf-to-gguf:blk.15.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
78 |
+
INFO:hf-to-gguf:blk.15.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
79 |
+
INFO:hf-to-gguf:blk.15.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
80 |
+
INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
81 |
+
INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
82 |
+
INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
83 |
+
INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
84 |
+
INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
85 |
+
INFO:hf-to-gguf:blk.16.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
86 |
+
INFO:hf-to-gguf:blk.16.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
87 |
+
INFO:hf-to-gguf:blk.16.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
88 |
+
INFO:hf-to-gguf:blk.16.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
89 |
+
INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
90 |
+
INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
91 |
+
INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
92 |
+
INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
93 |
+
INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
94 |
+
INFO:hf-to-gguf:blk.17.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
95 |
+
INFO:hf-to-gguf:blk.17.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
96 |
+
INFO:hf-to-gguf:blk.17.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
97 |
+
INFO:hf-to-gguf:blk.17.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
98 |
+
INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
99 |
+
INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
100 |
+
INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
101 |
+
INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
102 |
+
INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
103 |
+
INFO:hf-to-gguf:blk.18.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
104 |
+
INFO:hf-to-gguf:blk.18.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
105 |
+
INFO:hf-to-gguf:blk.18.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
106 |
+
INFO:hf-to-gguf:blk.18.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
107 |
+
INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
108 |
+
INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
109 |
+
INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
110 |
+
INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
111 |
+
INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
112 |
+
INFO:hf-to-gguf:blk.19.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
113 |
+
INFO:hf-to-gguf:blk.19.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
114 |
+
INFO:hf-to-gguf:blk.19.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
115 |
+
INFO:hf-to-gguf:blk.19.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
116 |
+
INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
117 |
+
INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
118 |
+
INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
119 |
+
INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
120 |
+
INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
121 |
+
INFO:hf-to-gguf:blk.2.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
122 |
+
INFO:hf-to-gguf:blk.2.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
123 |
+
INFO:hf-to-gguf:blk.2.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
124 |
+
INFO:hf-to-gguf:blk.2.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
125 |
+
INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
126 |
+
INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
127 |
+
INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
128 |
+
INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
129 |
+
INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
130 |
+
INFO:hf-to-gguf:blk.20.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
131 |
+
INFO:hf-to-gguf:blk.20.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
132 |
+
INFO:hf-to-gguf:blk.20.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
133 |
+
INFO:hf-to-gguf:blk.20.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
134 |
+
INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
135 |
+
INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
136 |
+
INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
137 |
+
INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
138 |
+
INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
139 |
+
INFO:hf-to-gguf:blk.21.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
140 |
+
INFO:hf-to-gguf:blk.21.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
141 |
+
INFO:hf-to-gguf:blk.21.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
142 |
+
INFO:hf-to-gguf:blk.21.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
143 |
+
INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
144 |
+
INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
145 |
+
INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
146 |
+
INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
147 |
+
INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
148 |
+
INFO:hf-to-gguf:blk.22.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
149 |
+
INFO:hf-to-gguf:blk.22.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
150 |
+
INFO:hf-to-gguf:blk.22.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
151 |
+
INFO:hf-to-gguf:blk.22.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
152 |
+
INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
153 |
+
INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
154 |
+
INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
155 |
+
INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
156 |
+
INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
157 |
+
INFO:hf-to-gguf:blk.23.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
158 |
+
INFO:hf-to-gguf:blk.23.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
159 |
+
INFO:hf-to-gguf:blk.23.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
160 |
+
INFO:hf-to-gguf:blk.23.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
161 |
+
INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
162 |
+
INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
163 |
+
INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
164 |
+
INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
165 |
+
INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
166 |
+
INFO:hf-to-gguf:blk.3.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
167 |
+
INFO:hf-to-gguf:blk.3.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
168 |
+
INFO:hf-to-gguf:blk.3.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
169 |
+
INFO:hf-to-gguf:blk.3.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
170 |
+
INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
171 |
+
INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
172 |
+
INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
173 |
+
INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
174 |
+
INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
175 |
+
INFO:hf-to-gguf:blk.4.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
176 |
+
INFO:hf-to-gguf:blk.4.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
177 |
+
INFO:hf-to-gguf:blk.4.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
178 |
+
INFO:hf-to-gguf:blk.4.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
179 |
+
INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
180 |
+
INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
181 |
+
INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
182 |
+
INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
183 |
+
INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
184 |
+
INFO:hf-to-gguf:blk.5.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
185 |
+
INFO:hf-to-gguf:blk.5.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
186 |
+
INFO:hf-to-gguf:blk.5.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
187 |
+
INFO:hf-to-gguf:blk.5.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
188 |
+
INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
189 |
+
INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
190 |
+
INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
191 |
+
INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
192 |
+
INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
193 |
+
INFO:hf-to-gguf:blk.6.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
194 |
+
INFO:hf-to-gguf:blk.6.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
195 |
+
INFO:hf-to-gguf:blk.6.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
196 |
+
INFO:hf-to-gguf:blk.6.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
197 |
+
INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
198 |
+
INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
199 |
+
INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
200 |
+
INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
201 |
+
INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
202 |
+
INFO:hf-to-gguf:blk.7.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
203 |
+
INFO:hf-to-gguf:blk.7.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
204 |
+
INFO:hf-to-gguf:blk.7.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
205 |
+
INFO:hf-to-gguf:blk.7.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
206 |
+
INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
207 |
+
INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
208 |
+
INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
209 |
+
INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
210 |
+
INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
211 |
+
INFO:hf-to-gguf:blk.8.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
212 |
+
INFO:hf-to-gguf:blk.8.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
213 |
+
INFO:hf-to-gguf:blk.8.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
214 |
+
INFO:hf-to-gguf:blk.8.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
215 |
+
INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
216 |
+
INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.bfloat16 --> BF16, shape = {5440, 2048}
|
217 |
+
INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
218 |
+
INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.bfloat16 --> BF16, shape = {2048, 5440}
|
219 |
+
INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
220 |
+
INFO:hf-to-gguf:blk.9.attn_k.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
221 |
+
INFO:hf-to-gguf:blk.9.attn_output.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
222 |
+
INFO:hf-to-gguf:blk.9.attn_q.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
223 |
+
INFO:hf-to-gguf:blk.9.attn_v.weight, torch.bfloat16 --> BF16, shape = {2048, 2048}
|
224 |
+
INFO:hf-to-gguf:output_norm.weight, torch.bfloat16 --> F32, shape = {2048}
|
225 |
+
INFO:hf-to-gguf:Set meta model
|
226 |
+
INFO:hf-to-gguf:Set model parameters
|
227 |
+
INFO:hf-to-gguf:gguf: context length = 8192
|
228 |
+
INFO:hf-to-gguf:gguf: embedding length = 2048
|
229 |
+
INFO:hf-to-gguf:gguf: feed forward length = 5440
|
230 |
+
INFO:hf-to-gguf:gguf: head count = 16
|
231 |
+
INFO:hf-to-gguf:gguf: key-value head count = 16
|
232 |
+
INFO:hf-to-gguf:gguf: rope theta = 10000.0
|
233 |
+
INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-05
|
234 |
+
INFO:hf-to-gguf:gguf: file type = 32
|
235 |
+
INFO:hf-to-gguf:Set model tokenizer
|
236 |
+
INFO:gguf.vocab:Setting special token type bos to 1
|
237 |
+
INFO:gguf.vocab:Setting special token type eos to 2
|
238 |
+
INFO:gguf.vocab:Setting special token type unk to 0
|
239 |
+
INFO:gguf.vocab:Setting add_bos_token to True
|
240 |
+
INFO:gguf.vocab:Setting add_eos_token to False
|
241 |
+
INFO:hf-to-gguf:Set model quantization version
|
242 |
+
INFO:gguf.gguf_writer:Writing the following files:
|
243 |
+
INFO:gguf.gguf_writer:salamandra-2b_bf16.gguf: n_tensors = 219, total_size = 4.5G
|
244 |
+
Writing: 100%|████████████████████████████████████████████████████████████████████| 4.51G/4.51G [00:10<00:00, 419Mbyte/s]
|
245 |
+
INFO:hf-to-gguf:Model successfully exported to salamandra-2b_bf16.gguf
|
git_snapshot.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
upstream: https://huggingface.co/BSC-LT/salamandra-2b
|
2 |
+
branch: origin/main
|
3 |
+
hash: f1f8713d7c0114f1f60fc274428cd158039e7425
|
model.safetensors → imatrix/oscar/imatrix-dataset.txt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7830c495e88be1484f0fdd9a4f0b405cb88c0482283b0cf478238aabfcf2840
|
3 |
+
size 101131321
|
tokenizer.model → imatrix/oscar/imatrix.dat
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:194d585dd3bb27574dad0e5da861492ac47104929a182fcebbd88c41567a95e9
|
3 |
+
size 1707457
|
imatrix/oscar/langs/bg.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7db75e8d3ab29d086a9f95843710c8e26eb82492feaba8892d98f7cd42c958c5
|
3 |
+
size 15369727
|
imatrix/oscar/langs/ca.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:620a3fb2dadf2ad55e32248e1dc5455f7f27470a1e25893f0470eaf53b9e4364
|
3 |
+
size 2452735
|
imatrix/oscar/langs/cs.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4bac9a98f322e5773b8e3e6abcd06ce243ad9ce1edf992db0ab5d9eea6e17490
|
3 |
+
size 6374140
|
imatrix/oscar/langs/cy.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41072c1798493fd14eaebd5ee9820d71ea803ee42b793f40248ab536ff5421c4
|
3 |
+
size 1711900
|
imatrix/oscar/langs/da.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e312b03a2cdf388224719c8e8b4b34911db629e77a818a6b272151c0746308a
|
3 |
+
size 2845693
|
imatrix/oscar/langs/de.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4c2008f2af94e4f62cca8dd03eb4e439b916b689b82545345369bec5c81a024
|
3 |
+
size 2820404
|
imatrix/oscar/langs/el.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fd2393f1b32910ca111e8a3d86a29b3c7ad5d2ca81ce756411c34f133759e1d
|
3 |
+
size 22016087
|
imatrix/oscar/langs/en.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af4f2a2b491f7a7c5eede69a5869762d7f2b4ef9f5e8960f3e34c96754f3e4b0
|
3 |
+
size 6204830
|
imatrix/oscar/langs/es.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71fbce9f6c2c8875a37c98bd8eed4144b64c91596214ff35676a3893d2d810ad
|
3 |
+
size 4007613
|
imatrix/oscar/langs/et.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e706f41af29936a9033192e73b11637bd0160c5d176beae051e9fbc2f8719d0
|
3 |
+
size 2366889
|
imatrix/oscar/langs/eu.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce9ef1dc787847b03b0d6af4bee5a097b38f55a9a3c3e9fc85727c1b50a2c964
|
3 |
+
size 793185
|
imatrix/oscar/langs/fi.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:652fc7fb857799b7e01382240f340324a116d1bd0d60f7ac78c2112b2b264581
|
3 |
+
size 4324807
|
imatrix/oscar/langs/fr.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75ff98d097421bc9d6dad95c84904fb25241123e6a397a47759dd5dba1b54246
|
3 |
+
size 3227190
|
imatrix/oscar/langs/ga.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21683a371855abef07d7f84b05a03617d60e969498eea2ba3b589f3997c2d6aa
|
3 |
+
size 1912591
|
imatrix/oscar/langs/gl.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7025affdb2538372f012f4213048ebaca72aa14fbbb14983d91f821aa5a42ef6
|
3 |
+
size 1375166
|
imatrix/oscar/langs/hr.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:838dce29fce8638d515bc64e3d33763a25c2f13a1ae5878e1412f23797345411
|
3 |
+
size 469023
|
imatrix/oscar/langs/hu.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d856286560de976671741744705c7ef227be2dd648bcf320fe63a0a6ef1aeea1
|
3 |
+
size 6021141
|
imatrix/oscar/langs/it.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:220bc92cd65fbd5b432a453debe102e1b0c947a492cec6d3ff5a5b87cf3c7eac
|
3 |
+
size 3893483
|
imatrix/oscar/langs/lt.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db1dff9403e72501447e46fb959b92740ca94afe57ada684e010e8dae3875559
|
3 |
+
size 3543428
|
imatrix/oscar/langs/lv.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0fa438a0385c7b8d5f6cdefdc7a025af13bd85916409c3cc0087d2589b91d2a
|
3 |
+
size 2837848
|
imatrix/oscar/langs/mt.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:097c73ca100485b1b6f7cdfee2e34ef6cf85bdf350cbf91e0328017adbcdab73
|
3 |
+
size 966065
|
imatrix/oscar/langs/nl.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:749ff8293fdf1552e4e1c309431f4305260676945882c721e4287c1e7608d6c7
|
3 |
+
size 3201009
|
imatrix/oscar/langs/nn.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:996c7311ab41ee25bee21d9125e8bb7fefcc3352ca4edaf2429b8e5ff0f3ad42
|
3 |
+
size 473060
|
imatrix/oscar/langs/no.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:018f84ce699d6ce8dcffc1f4ea9ce69ce29f60dff5c822039c1575f41e6f92fa
|
3 |
+
size 2004094
|
imatrix/oscar/langs/oc.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4f77cb54290063b6089120728e15468db7b212bf617c4678014503c866ede5c
|
3 |
+
size 672153
|
imatrix/oscar/langs/pl.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8d98cc00d727be32032107f9deae185aa2aac04d79a563b39069b50c288d09d
|
3 |
+
size 3187625
|
imatrix/oscar/langs/pt.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2d66c7bb0058423bf5bb97809c907bffa439e2ca48eac4a1fd87a0c2475c25c
|
3 |
+
size 3828558
|