update model to step 124484
Browse files- .ipynb_checkpoints/README-checkpoint.md +25 -28
- .ipynb_checkpoints/config-checkpoint.json +26 -0
- README.md +24 -24
- pytorch_model.bin +1 -1
.ipynb_checkpoints/README-checkpoint.md
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
language:
|
3 |
- it
|
4 |
pipeline_tag: text-generation
|
|
|
5 |
widget:
|
6 |
- text: Alessandro è un ragazzo che progetta Infissi
|
7 |
- text: Melissa è una ragazza che adora
|
@@ -10,31 +11,27 @@ tags:
|
|
10 |
- italiano
|
11 |
- llama
|
12 |
---
|
13 |
-
This is a train starting from an empty model based exclusively on Italian language datasets (currently redpajama 2023-14 it)
|
14 |
-
|
15 |
-
the train is ongoing and will extend to new datasets
|
16 |
-
|
17 |
-
More precise versions will be published shortly
|
18 |
-
|
19 |
-
Train on my server, i have studied and adapted the model starting from the repository https://github.com/karpathy/llama2.c
|
20 |
-
|
21 |
-
# max_seq_len: 7b = 2048
|
22 |
-
|
23 |
-
|
24 |
-
#
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
n_heads = 32
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
multiple_of = 32
|
38 |
-
|
39 |
-
num decayed parameter tensors: 225, with 251,068,416 parameters
|
40 |
-
num non-decayed parameter tensors: 65, with 49,920 parameters
|
|
|
2 |
language:
|
3 |
- it
|
4 |
pipeline_tag: text-generation
|
5 |
+
max_length: 100
|
6 |
widget:
|
7 |
- text: Alessandro è un ragazzo che progetta Infissi
|
8 |
- text: Melissa è una ragazza che adora
|
|
|
11 |
- italiano
|
12 |
- llama
|
13 |
---
|
14 |
+
This is a train starting from an empty model based exclusively on Italian language datasets (currently redpajama 2023-14 it)<br/>
|
15 |
+
<br/>
|
16 |
+
the train is ongoing and will extend to new datasets.<br/>
|
17 |
+
<br/>
|
18 |
+
More precise versions will be published shortly.<br/>
|
19 |
+
<br/>
|
20 |
+
Train on my server, i have studied and adapted the model starting from the repository https://github.com/karpathy/llama2.c<br/>
|
21 |
+
<br/>
|
22 |
+
# max_seq_len: (7b = 2048) The maximum sequence length for input data.<br/>
|
23 |
+
# dim (7b= 4096) Represents the dimensionality of the model<br/>
|
24 |
+
# n_layers: (7b = 32) The number of layers in the model<br/>
|
25 |
+
# n_heads: (7b = 32) Determines the number of attention heads in the model<br/>
|
26 |
+
# n_kv_heads: (7b = 32) The number of key and value heads<br/>
|
27 |
+
# multiple_of: (7b = 256) A value used to make the SwiGLU hidden layer size a multiple of a large power of 2<br/>
|
28 |
+
<br/>
|
29 |
+
max_seq_len = 1024<br/>
|
30 |
+
dim = 768<br/>
|
31 |
+
n_layers = 32<br/>
|
32 |
+
n_heads = 32<br/>
|
33 |
+
n_kv_heads = 32<br/>
|
34 |
+
multiple_of = 32<br/>
|
35 |
+
<br/>
|
36 |
+
num decayed parameter tensors: 225, with 251,068,416 parameters<br/>
|
37 |
+
num non-decayed parameter tensors: 65, with 49,920 parameters<br/>
|
|
|
|
|
|
|
|
.ipynb_checkpoints/config-checkpoint.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"LlamaForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_bias": false,
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 1,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 768,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 2048,
|
13 |
+
"max_position_embeddings": 1024,
|
14 |
+
"model_type": "llama",
|
15 |
+
"num_attention_heads": 32,
|
16 |
+
"num_hidden_layers": 32,
|
17 |
+
"num_key_value_heads": 32,
|
18 |
+
"pretraining_tp": 1,
|
19 |
+
"rms_norm_eps": 1e-05,
|
20 |
+
"rope_scaling": null,
|
21 |
+
"rope_theta": 10000.0,
|
22 |
+
"tie_word_embeddings": true,
|
23 |
+
"transformers_version": "4.37.1",
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 32000
|
26 |
+
}
|
README.md
CHANGED
@@ -11,27 +11,27 @@ tags:
|
|
11 |
- italiano
|
12 |
- llama
|
13 |
---
|
14 |
-
This is a train starting from an empty model based exclusively on Italian language datasets (currently redpajama 2023-14 it)
|
15 |
-
|
16 |
-
the train is ongoing and will extend to new datasets
|
17 |
-
|
18 |
-
More precise versions will be published shortly
|
19 |
-
|
20 |
-
Train on my server, i have studied and adapted the model starting from the repository https://github.com/karpathy/llama2.c
|
21 |
-
|
22 |
-
# max_seq_len: 7b = 2048
|
23 |
-
# dim 7b= 4096
|
24 |
-
# n_layers: 7b = 32
|
25 |
-
# n_heads: 7b = 32
|
26 |
-
# n_kv_heads: 7b = 32
|
27 |
-
# multiple_of: 7b = 256
|
28 |
-
|
29 |
-
max_seq_len = 1024
|
30 |
-
dim = 768
|
31 |
-
n_layers = 32
|
32 |
-
n_heads = 32
|
33 |
-
n_kv_heads = 32
|
34 |
-
multiple_of = 32
|
35 |
-
|
36 |
-
num decayed parameter tensors: 225, with 251,068,416 parameters
|
37 |
-
num non-decayed parameter tensors: 65, with 49,920 parameters
|
|
|
11 |
- italiano
|
12 |
- llama
|
13 |
---
|
14 |
+
This is a train starting from an empty model based exclusively on Italian language datasets (currently redpajama 2023-14 it)<br/>
|
15 |
+
<br/>
|
16 |
+
the train is ongoing and will extend to new datasets.<br/>
|
17 |
+
<br/>
|
18 |
+
More precise versions will be published shortly.<br/>
|
19 |
+
<br/>
|
20 |
+
Train on my server, i have studied and adapted the model starting from the repository https://github.com/karpathy/llama2.c<br/>
|
21 |
+
<br/>
|
22 |
+
# max_seq_len: (7b = 2048) The maximum sequence length for input data.<br/>
|
23 |
+
# dim (7b= 4096) Represents the dimensionality of the model<br/>
|
24 |
+
# n_layers: (7b = 32) The number of layers in the model<br/>
|
25 |
+
# n_heads: (7b = 32) Determines the number of attention heads in the model<br/>
|
26 |
+
# n_kv_heads: (7b = 32) The number of key and value heads<br/>
|
27 |
+
# multiple_of: (7b = 256) A value used to make the SwiGLU hidden layer size a multiple of a large power of 2<br/>
|
28 |
+
<br/>
|
29 |
+
max_seq_len = 1024<br/>
|
30 |
+
dim = 768<br/>
|
31 |
+
n_layers = 32<br/>
|
32 |
+
n_heads = 32<br/>
|
33 |
+
n_kv_heads = 32<br/>
|
34 |
+
multiple_of = 32<br/>
|
35 |
+
<br/>
|
36 |
+
num decayed parameter tensors: 225, with 251,068,416 parameters<br/>
|
37 |
+
num non-decayed parameter tensors: 65, with 49,920 parameters<br/>
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1004567442
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab62b69b46b7f795f22d07447f33fa985864f7fdd281df9a3d26834a1750744f
|
3 |
size 1004567442
|