legraphista commited on
Commit
25a8472
1 Parent(s): f6d96da

Upload imatrix.log with huggingface_hub

Browse files
Files changed (1) hide show
  1. imatrix.log +159 -0
imatrix.log ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ main: build = 2998 (9588f196)
2
+ main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
3
+ main: seed = 1716674713
4
+ llama_model_loader: loaded meta data with 28 key-value pairs and 322 tensors from aya-23-35B-IMat-GGUF/aya-23-35B.gguf (version GGUF V3 (latest))
5
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
6
+ llama_model_loader: - kv 0: general.architecture str = command-r
7
+ llama_model_loader: - kv 1: general.name str = aya-23-35B
8
+ llama_model_loader: - kv 2: command-r.block_count u32 = 40
9
+ llama_model_loader: - kv 3: command-r.context_length u32 = 8192
10
+ llama_model_loader: - kv 4: command-r.embedding_length u32 = 8192
11
+ llama_model_loader: - kv 5: command-r.feed_forward_length u32 = 22528
12
+ llama_model_loader: - kv 6: command-r.attention.head_count u32 = 64
13
+ llama_model_loader: - kv 7: command-r.attention.head_count_kv u32 = 64
14
+ llama_model_loader: - kv 8: command-r.rope.freq_base f32 = 8000000.000000
15
+ llama_model_loader: - kv 9: command-r.attention.layer_norm_epsilon f32 = 0.000010
16
+ llama_model_loader: - kv 10: general.file_type u32 = 1
17
+ llama_model_loader: - kv 11: command-r.logit_scale f32 = 0.062500
18
+ llama_model_loader: - kv 12: command-r.rope.scaling.type str = none
19
+ llama_model_loader: - kv 13: tokenizer.ggml.model str = gpt2
20
+ llama_model_loader: - kv 14: tokenizer.ggml.pre str = command-r
21
+ llama_model_loader: - kv 15: tokenizer.ggml.tokens arr[str,256000] = ["<PAD>", "<UNK>", "<CLS>", "<SEP>", ...
22
+ llama_model_loader: - kv 16: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, ...
23
+ llama_model_loader: - kv 17: tokenizer.ggml.merges arr[str,253333] = ["Ġ Ġ", "Ġ t", "e r", "i n", "Ġ a...
24
+ llama_model_loader: - kv 18: tokenizer.ggml.bos_token_id u32 = 5
25
+ llama_model_loader: - kv 19: tokenizer.ggml.eos_token_id u32 = 255001
26
+ llama_model_loader: - kv 20: tokenizer.ggml.padding_token_id u32 = 0
27
+ llama_model_loader: - kv 21: tokenizer.ggml.add_bos_token bool = true
28
+ llama_model_loader: - kv 22: tokenizer.ggml.add_eos_token bool = false
29
+ llama_model_loader: - kv 23: tokenizer.chat_template.tool_use str = {{ bos_token }}{% if messages[0]['rol...
30
+ llama_model_loader: - kv 24: tokenizer.chat_template.rag str = {{ bos_token }}{% if messages[0]['rol...
31
+ llama_model_loader: - kv 25: tokenizer.chat_templates arr[str,2] = ["rag", "tool_use"]
32
+ llama_model_loader: - kv 26: tokenizer.chat_template str = {{ bos_token }}{% if messages[0]['rol...
33
+ llama_model_loader: - kv 27: general.quantization_version u32 = 2
34
+ llama_model_loader: - type f32: 41 tensors
35
+ llama_model_loader: - type f16: 281 tensors
36
+ llm_load_vocab: special tokens definition check successful ( 1008/256000 ).
37
+ llm_load_print_meta: format = GGUF V3 (latest)
38
+ llm_load_print_meta: arch = command-r
39
+ llm_load_print_meta: vocab type = BPE
40
+ llm_load_print_meta: n_vocab = 256000
41
+ llm_load_print_meta: n_merges = 253333
42
+ llm_load_print_meta: n_ctx_train = 8192
43
+ llm_load_print_meta: n_embd = 8192
44
+ llm_load_print_meta: n_head = 64
45
+ llm_load_print_meta: n_head_kv = 64
46
+ llm_load_print_meta: n_layer = 40
47
+ llm_load_print_meta: n_rot = 128
48
+ llm_load_print_meta: n_embd_head_k = 128
49
+ llm_load_print_meta: n_embd_head_v = 128
50
+ llm_load_print_meta: n_gqa = 1
51
+ llm_load_print_meta: n_embd_k_gqa = 8192
52
+ llm_load_print_meta: n_embd_v_gqa = 8192
53
+ llm_load_print_meta: f_norm_eps = 1.0e-05
54
+ llm_load_print_meta: f_norm_rms_eps = 0.0e+00
55
+ llm_load_print_meta: f_clamp_kqv = 0.0e+00
56
+ llm_load_print_meta: f_max_alibi_bias = 0.0e+00
57
+ llm_load_print_meta: f_logit_scale = 6.2e-02
58
+ llm_load_print_meta: n_ff = 22528
59
+ llm_load_print_meta: n_expert = 0
60
+ llm_load_print_meta: n_expert_used = 0
61
+ llm_load_print_meta: causal attn = 1
62
+ llm_load_print_meta: pooling type = 0
63
+ llm_load_print_meta: rope type = 0
64
+ llm_load_print_meta: rope scaling = none
65
+ llm_load_print_meta: freq_base_train = 8000000.0
66
+ llm_load_print_meta: freq_scale_train = 1
67
+ llm_load_print_meta: n_yarn_orig_ctx = 8192
68
+ llm_load_print_meta: rope_finetuned = unknown
69
+ llm_load_print_meta: ssm_d_conv = 0
70
+ llm_load_print_meta: ssm_d_inner = 0
71
+ llm_load_print_meta: ssm_d_state = 0
72
+ llm_load_print_meta: ssm_dt_rank = 0
73
+ llm_load_print_meta: model type = 35B
74
+ llm_load_print_meta: model ftype = F16
75
+ llm_load_print_meta: model params = 34.98 B
76
+ llm_load_print_meta: model size = 65.16 GiB (16.00 BPW)
77
+ llm_load_print_meta: general.name = aya-23-35B
78
+ llm_load_print_meta: BOS token = 5 '<BOS_TOKEN>'
79
+ llm_load_print_meta: EOS token = 255001 '<|END_OF_TURN_TOKEN|>'
80
+ llm_load_print_meta: PAD token = 0 '<PAD>'
81
+ llm_load_print_meta: LF token = 136 'Ä'
82
+ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
83
+ ggml_cuda_init: CUDA_USE_TENSOR_CORES: yes
84
+ ggml_cuda_init: found 1 CUDA devices:
85
+ Device 0: NVIDIA GeForce RTX 4090, compute capability 8.9, VMM: yes
86
+ llm_load_tensors: ggml ctx size = 0.34 MiB
87
+ llm_load_tensors: offloading 10 repeating layers to GPU
88
+ llm_load_tensors: offloaded 10/41 layers to GPU
89
+ llm_load_tensors: CPU buffer size = 66721.28 MiB
90
+ llm_load_tensors: CUDA0 buffer size = 15680.31 MiB
91
+ ...........................................................................................
92
+ llama_new_context_with_model: n_ctx = 512
93
+ llama_new_context_with_model: n_batch = 512
94
+ llama_new_context_with_model: n_ubatch = 512
95
+ llama_new_context_with_model: flash_attn = 0
96
+ llama_new_context_with_model: freq_base = 8000000.0
97
+ llama_new_context_with_model: freq_scale = 1
98
+ llama_kv_cache_init: CUDA_Host KV buffer size = 480.00 MiB
99
+ llama_kv_cache_init: CUDA0 KV buffer size = 160.00 MiB
100
+ llama_new_context_with_model: KV self size = 640.00 MiB, K (f16): 320.00 MiB, V (f16): 320.00 MiB
101
+ llama_new_context_with_model: CUDA_Host output buffer size = 0.98 MiB
102
+ llama_new_context_with_model: CUDA0 compute buffer size = 4516.00 MiB
103
+ llama_new_context_with_model: CUDA_Host compute buffer size = 33.01 MiB
104
+ llama_new_context_with_model: graph nodes = 1208
105
+ llama_new_context_with_model: graph splits = 304
106
+
107
+ system_info: n_threads = 25 / 32 | AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | FMA = 1 | NEON = 0 | SVE = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | LLAMAFILE = 1 |
108
+ compute_imatrix: tokenizing the input ..
109
+ compute_imatrix: tokenization took 195.016 ms
110
+ compute_imatrix: computing over 194 chunks with batch_size 512
111
+ compute_imatrix: 3.91 seconds per pass - ETA 12.63 minutes
112
+ [1]5.7654,[2]4.0556,[3]3.7932,[4]4.1756,[5]4.1190,[6]3.8734,[7]4.6046,[8]4.8725,[9]5.4760,
113
+ save_imatrix: stored collected data after 10 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
114
+ [10]5.7366,[11]5.8940,[12]6.0375,[13]6.4213,[14]6.6150,[15]6.9195,[16]7.0883,[17]7.2951,[18]7.5592,[19]7.6438,
115
+ save_imatrix: stored collected data after 20 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
116
+ [20]7.2758,[21]7.0995,[22]6.9637,[23]6.6133,[24]6.3833,[25]6.3229,[26]6.4615,[27]6.3775,[28]6.5183,[29]6.3846,
117
+ save_imatrix: stored collected data after 30 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
118
+ [30]6.3824,[31]6.1050,[32]5.9291,[33]5.8513,[34]5.8304,[35]5.8026,[36]5.8335,[37]5.9018,[38]5.9573,[39]6.0502,
119
+ save_imatrix: stored collected data after 40 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
120
+ [40]6.1367,[41]6.2154,[42]6.4067,[43]6.6069,[44]6.8114,[45]6.9215,[46]6.8986,[47]6.8746,[48]6.8201,[49]6.9057,
121
+ save_imatrix: stored collected data after 50 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
122
+ [50]6.9739,[51]7.0457,[52]7.1585,[53]7.2114,[54]7.2555,[55]7.3090,[56]7.3144,[57]7.3257,[58]7.3349,[59]7.3297,
123
+ save_imatrix: stored collected data after 60 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
124
+ [60]7.4142,[61]7.4819,[62]7.5189,[63]7.5494,[64]7.4811,[65]7.4235,[66]7.3807,[67]7.3641,[68]7.3352,[69]7.2930,
125
+ save_imatrix: stored collected data after 70 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
126
+ [70]7.2121,[71]7.2013,[72]7.1772,[73]7.1835,[74]7.2057,[75]7.2189,[76]7.2290,[77]7.2053,[78]7.1416,[79]7.0566,
127
+ save_imatrix: stored collected data after 80 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
128
+ [80]7.0084,[81]6.9395,[82]6.8934,[83]6.8255,[84]6.7942,[85]6.7825,[86]6.7648,[87]6.7581,[88]6.7758,[89]6.7894,
129
+ save_imatrix: stored collected data after 90 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
130
+ [90]6.8124,[91]6.7853,[92]6.7439,[93]6.7325,[94]6.7536,[95]6.7447,[96]6.7446,[97]6.7477,[98]6.7737,[99]6.7457,
131
+ save_imatrix: stored collected data after 100 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
132
+ [100]6.7739,[101]6.7715,[102]6.7508,[103]6.7648,[104]6.7474,[105]6.7189,[106]6.6777,[107]6.7072,[108]6.7467,[109]6.7360,
133
+ save_imatrix: stored collected data after 110 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
134
+ [110]6.7270,[111]6.7261,[112]6.7714,[113]6.7166,[114]6.7001,[115]6.6775,[116]6.6332,[117]6.6115,[118]6.5818,[119]6.5468,
135
+ save_imatrix: stored collected data after 120 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
136
+ [120]6.5167,[121]6.4764,[122]6.4525,[123]6.4167,[124]6.3857,[125]6.3666,[126]6.3854,[127]6.4183,[128]6.4480,[129]6.4659,
137
+ save_imatrix: stored collected data after 130 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
138
+ [130]6.4972,[131]6.5829,[132]6.6624,[133]6.7438,[134]6.8327,[135]6.8791,[136]6.9207,[137]6.9391,[138]6.9684,[139]6.9847,
139
+ save_imatrix: stored collected data after 140 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
140
+ [140]7.0060,[141]7.0407,[142]7.0672,[143]7.0978,[144]7.1228,[145]7.1438,[146]7.1297,[147]7.1694,[148]7.1815,[149]7.2047,
141
+ save_imatrix: stored collected data after 150 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
142
+ [150]7.1886,[151]7.2105,[152]7.2040,[153]7.1868,[154]7.1736,[155]7.1720,[156]7.1733,[157]7.1775,[158]7.1735,[159]7.1426,
143
+ save_imatrix: stored collected data after 160 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
144
+ [160]7.1877,[161]7.2294,[162]7.2675,[163]7.3431,[164]7.3812,[165]7.3903,[166]7.3919,[167]7.4201,[168]7.4045,[169]7.4347,
145
+ save_imatrix: stored collected data after 170 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
146
+ [170]7.4195,[171]7.4098,[172]7.4208,[173]7.4430,[174]7.4460,[175]7.4539,[176]7.4689,[177]7.4688,[178]7.4555,[179]7.4409,
147
+ save_imatrix: stored collected data after 180 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
148
+ [180]7.4359,[181]7.4262,[182]7.4254,[183]7.4147,[184]7.4072,[185]7.3785,[186]7.3827,[187]7.3697,[188]7.3819,[189]7.3945,
149
+ save_imatrix: stored collected data after 190 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
150
+ [190]7.4083,[191]7.4241,[192]7.4100,[193]7.3691,[194]7.3317,
151
+ save_imatrix: stored collected data after 194 chunks in aya-23-35B-IMat-GGUF/imatrix.dat
152
+
153
+ llama_print_timings: load time = 7127.77 ms
154
+ llama_print_timings: sample time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
155
+ llama_print_timings: prompt eval time = 737457.76 ms / 99328 tokens ( 7.42 ms per token, 134.69 tokens per second)
156
+ llama_print_timings: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
157
+ llama_print_timings: total time = 744301.62 ms / 99329 tokens
158
+
159
+ Final estimate: PPL = 7.3317 +/- 0.08422