Update models
Browse files- .gitattributes +20 -0
- config.json +42 -0
- r1-1776-distill-llama-70b-Q2_K.gguf +3 -0
- r1-1776-distill-llama-70b-Q3_K_L.gguf +3 -0
- r1-1776-distill-llama-70b-Q3_K_M.gguf +3 -0
- r1-1776-distill-llama-70b-Q3_K_S.gguf +3 -0
- r1-1776-distill-llama-70b-Q4_0.gguf +3 -0
- r1-1776-distill-llama-70b-Q4_K_M.gguf +3 -0
- r1-1776-distill-llama-70b-Q4_K_S.gguf +3 -0
- r1-1776-distill-llama-70b-Q5_0.gguf +3 -0
- r1-1776-distill-llama-70b-Q5_K_M.gguf +3 -0
- r1-1776-distill-llama-70b-Q5_K_S.gguf +3 -0
- r1-1776-distill-llama-70b-Q6_K-00001-of-00002.gguf +3 -0
- r1-1776-distill-llama-70b-Q6_K-00002-of-00002.gguf +3 -0
- r1-1776-distill-llama-70b-Q8_0-00001-of-00003.gguf +3 -0
- r1-1776-distill-llama-70b-Q8_0-00002-of-00003.gguf +3 -0
- r1-1776-distill-llama-70b-Q8_0-00003-of-00003.gguf +3 -0
- r1-1776-distill-llama-70b-f16-00001-of-00005.gguf +3 -0
- r1-1776-distill-llama-70b-f16-00002-of-00005.gguf +3 -0
- r1-1776-distill-llama-70b-f16-00003-of-00005.gguf +3 -0
- r1-1776-distill-llama-70b-f16-00004-of-00005.gguf +3 -0
- r1-1776-distill-llama-70b-f16-00005-of-00005.gguf +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,23 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
r1-1776-distill-llama-70b-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
r1-1776-distill-llama-70b-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
|
38 |
+
r1-1776-distill-llama-70b-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
39 |
+
r1-1776-distill-llama-70b-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
40 |
+
r1-1776-distill-llama-70b-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
|
41 |
+
r1-1776-distill-llama-70b-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
42 |
+
r1-1776-distill-llama-70b-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
43 |
+
r1-1776-distill-llama-70b-Q5_0.gguf filter=lfs diff=lfs merge=lfs -text
|
44 |
+
r1-1776-distill-llama-70b-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
45 |
+
r1-1776-distill-llama-70b-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
46 |
+
r1-1776-distill-llama-70b-Q6_K-00001-of-00002.gguf filter=lfs diff=lfs merge=lfs -text
|
47 |
+
r1-1776-distill-llama-70b-Q6_K-00002-of-00002.gguf filter=lfs diff=lfs merge=lfs -text
|
48 |
+
r1-1776-distill-llama-70b-Q8_0-00001-of-00003.gguf filter=lfs diff=lfs merge=lfs -text
|
49 |
+
r1-1776-distill-llama-70b-Q8_0-00002-of-00003.gguf filter=lfs diff=lfs merge=lfs -text
|
50 |
+
r1-1776-distill-llama-70b-Q8_0-00003-of-00003.gguf filter=lfs diff=lfs merge=lfs -text
|
51 |
+
r1-1776-distill-llama-70b-f16-00001-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
|
52 |
+
r1-1776-distill-llama-70b-f16-00002-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
|
53 |
+
r1-1776-distill-llama-70b-f16-00003-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
|
54 |
+
r1-1776-distill-llama-70b-f16-00004-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
|
55 |
+
r1-1776-distill-llama-70b-f16-00005-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
3 |
+
"architectures": [
|
4 |
+
"LlamaForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 128000,
|
9 |
+
"eos_token_id": [
|
10 |
+
128001,
|
11 |
+
128008,
|
12 |
+
128009
|
13 |
+
],
|
14 |
+
"head_dim": 128,
|
15 |
+
"hidden_act": "silu",
|
16 |
+
"hidden_size": 8192,
|
17 |
+
"initializer_range": 0.02,
|
18 |
+
"intermediate_size": 28672,
|
19 |
+
"max_position_embeddings": 131072,
|
20 |
+
"mlp_bias": false,
|
21 |
+
"model_type": "llama",
|
22 |
+
"num_attention_heads": 64,
|
23 |
+
"num_hidden_layers": 80,
|
24 |
+
"num_key_value_heads": 8,
|
25 |
+
"pad_token_id": 128004,
|
26 |
+
"pretraining_tp": 1,
|
27 |
+
"rms_norm_eps": 1e-05,
|
28 |
+
"rope_scaling": {
|
29 |
+
"factor": 8.0,
|
30 |
+
"high_freq_factor": 4.0,
|
31 |
+
"low_freq_factor": 1.0,
|
32 |
+
"original_max_position_embeddings": 8192,
|
33 |
+
"rope_type": "llama3"
|
34 |
+
},
|
35 |
+
"rope_theta": 500000.0,
|
36 |
+
"tie_word_embeddings": false,
|
37 |
+
"torch_dtype": "bfloat16",
|
38 |
+
"transformers_version": "4.48.0",
|
39 |
+
"unsloth_fixed": true,
|
40 |
+
"use_cache": true,
|
41 |
+
"vocab_size": 128256
|
42 |
+
}
|
r1-1776-distill-llama-70b-Q2_K.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45c54c536d1b09ab03436b7281539bd1a5fe5366770c7e0478ccebf7b191964b
|
3 |
+
size 26375110944
|
r1-1776-distill-llama-70b-Q3_K_L.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b53b92832559845ef076ef997edcbf3b33722170ccce0aee734fac52353a9718
|
3 |
+
size 37140594976
|
r1-1776-distill-llama-70b-Q3_K_M.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41d51bc624ba257ca118777c9124479d4d3c4012b41f2e7383caa6f9add8196b
|
3 |
+
size 34267496736
|
r1-1776-distill-llama-70b-Q3_K_S.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:051476c6edf9a8f6739b6d965f3e5fc99c089365ad009953467e9c9a344a4358
|
3 |
+
size 30912053536
|
r1-1776-distill-llama-70b-Q4_0.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3c8d5bdcc5a820090ad61eeacff94740ed569933b0fdcb2a9baae85df5ea52b
|
3 |
+
size 39969734944
|
r1-1776-distill-llama-70b-Q4_K_M.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6025d558c266fd1bb62e359579fa85338a019e0c59f2eb65f3ce7f820c566b1
|
3 |
+
size 42520396064
|
r1-1776-distill-llama-70b-Q4_K_S.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b8761bc98e78f1d3fd58c6effd7a33ffc02799432020459d1f869542a3519e7
|
3 |
+
size 40347222304
|
r1-1776-distill-llama-70b-Q5_0.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:180aed5f7ae8eedf1a16c58ddc5e2dcaf8356414209e46bb0daca5e26f87b85e
|
3 |
+
size 48657449248
|
r1-1776-distill-llama-70b-Q5_K_M.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89c060b48abddca84026446a3974dd4c9249e208d9bd4e262977d63239b26b41
|
3 |
+
size 49949819168
|
r1-1776-distill-llama-70b-Q5_K_S.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c3e19d7e8160a837134c81050762a08b71fa3e80e793077d35f751e6165b459
|
3 |
+
size 48657449248
|
r1-1776-distill-llama-70b-Q6_K-00001-of-00002.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2dba041ea134d5448be4fdcb1d836b26ed8e350b63e8196ca8efac243f4b870e
|
3 |
+
size 29933776512
|
r1-1776-distill-llama-70b-Q6_K-00002-of-00002.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64ff1ed1a4ff2a814710f1a8730ed1f94104c83d7b9a5c0dd786f596da105c09
|
3 |
+
size 27954369376
|
r1-1776-distill-llama-70b-Q8_0-00001-of-00003.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b845789f6f529acf52c525bb66bc2e1984bdd918d68e06d60127e84bc9ba39e4
|
3 |
+
size 29926005184
|
r1-1776-distill-llama-70b-Q8_0-00002-of-00003.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47ff366fc8972462ea4627868cf67d4e78f7a96644340bc58fdac91edb61dedd
|
3 |
+
size 29753427360
|
r1-1776-distill-llama-70b-Q8_0-00003-of-00003.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c90d9652b8aff00ce75819e82036f1e69fa6659dbf7cc73e10f85e42cfbe6c71
|
3 |
+
size 15295619840
|
r1-1776-distill-llama-70b-f16-00001-of-00005.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d87205f11644924d47873d8684fb18d3911e10ceea5830c9204cd8d437d9f5a
|
3 |
+
size 29960436480
|
r1-1776-distill-llama-70b-f16-00002-of-00005.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bb6be1fe42b99bb7dad7e46005ba88c2957bdffd853af7cea7acdc90bc95177
|
3 |
+
size 29562578048
|
r1-1776-distill-llama-70b-f16-00003-of-00005.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7743bc64869cb4b7c2dab9869a0b507ea4a0d9eff242fd838f7cfe040e34b1d1
|
3 |
+
size 29864633824
|
r1-1776-distill-llama-70b-f16-00004-of-00005.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efa2362198c31f31ecb171e909010b7ec1a0f7a824d25f891a6ecc1e5bb96244
|
3 |
+
size 29562545216
|
r1-1776-distill-llama-70b-f16-00005-of-00005.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6d39817b8145c3391b9ee6106b05caa681942ca2612e87bed54351d05563177
|
3 |
+
size 22167722464
|