peruginia
/

Llama-2-Small

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

peruginia commited on Apr 2, 2024

Commit

447d953

·

verified ·

1 Parent(s): d7e183d

Update README.md

Files changed (1) hide show

README.md +7 -10

README.md CHANGED Viewed

@@ -2,6 +2,7 @@
 language:
 - it
 pipeline_tag: text-generation
 widget:
 - text: Alessandro è un ragazzo che progetta Infissi
 - text: Melissa è una ragazza che adora
@@ -19,21 +20,17 @@ More precise versions will be published shortly.
 Train on my server, i have studied and adapted the model starting from the repository https://github.com/karpathy/llama2.c
 # max_seq_len: 7b = 2048: It represents the maximum sequence length for input data.
-max_seq_len = 1024  #7b=2048
 # dim 7b= 4096: This attribute represents the dimensionality of the model
-dim         = 768
 # n_layers: 7b = 32: It specifies the number of layers in the model
-n_layers    = 32
 # n_heads: 7b = 32: This attribute determines the number of attention heads in the model
-n_heads     = 32
 # n_kv_heads: 7b = 32: It represents the number of key and value heads,
-n_kv_heads  = 32
 # multiple_of: 7b = 256: It specifies a value used to make the SwiGLU hidden layer size a multiple of a large power of 2
 multiple_of = 32
 num decayed parameter tensors: 225, with 251,068,416 parameters

 language:
 - it
 pipeline_tag: text-generation
+max_length: 100
 widget:
 - text: Alessandro è un ragazzo che progetta Infissi
 - text: Melissa è una ragazza che adora
 Train on my server, i have studied and adapted the model starting from the repository https://github.com/karpathy/llama2.c
 # max_seq_len: 7b = 2048: It represents the maximum sequence length for input data.
 # dim 7b= 4096: This attribute represents the dimensionality of the model
 # n_layers: 7b = 32: It specifies the number of layers in the model
 # n_heads: 7b = 32: This attribute determines the number of attention heads in the model
 # n_kv_heads: 7b = 32: It represents the number of key and value heads,
 # multiple_of: 7b = 256: It specifies a value used to make the SwiGLU hidden layer size a multiple of a large power of 2
+max_seq_len = 1024
+dim         = 768
+n_layers    = 32
+n_heads     = 32
+n_kv_heads  = 32
 multiple_of = 32
 num decayed parameter tensors: 225, with 251,068,416 parameters