Text Generation
Transformers
PyTorch
English
opt
deepspeed
chatgpt
sft
Inference Endpoints
text-generation-inference
Adam commited on
Commit
1d60072
1 Parent(s): 8dc4741

feat: adding in sft model

Browse files
Files changed (5) hide show
  1. config.json +32 -0
  2. merges.txt +0 -0
  3. pytorch_model.bin +3 -0
  4. training.log +0 -0
  5. vocab.json +0 -0
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/opt-1.3b",
3
+ "_remove_final_layer_norm": false,
4
+ "activation_dropout": 0.0,
5
+ "activation_function": "relu",
6
+ "architectures": [
7
+ "OPTForCausalLM"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "bos_token_id": 2,
11
+ "do_layer_norm_before": true,
12
+ "dropout": 0.0,
13
+ "enable_bias": true,
14
+ "end_token_id": 2,
15
+ "eos_token_id": 2,
16
+ "ffn_dim": 8192,
17
+ "hidden_size": 2048,
18
+ "init_std": 0.02,
19
+ "layer_norm_elementwise_affine": true,
20
+ "layerdrop": 0.0,
21
+ "max_position_embeddings": 2048,
22
+ "model_type": "opt",
23
+ "num_attention_heads": 32,
24
+ "num_hidden_layers": 24,
25
+ "pad_token_id": 2,
26
+ "prefix": "</s>",
27
+ "torch_dtype": "float16",
28
+ "transformers_version": "4.29.0.dev0",
29
+ "use_cache": true,
30
+ "vocab_size": 50272,
31
+ "word_embed_proj_dim": 2048
32
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eabca84fb352bba1f3f26c7f7990350cf1e1da46dfee7ec8b4ff64be08949957
3
+ size 2631585575
training.log ADDED
The diff for this file is too large to render. See raw diff
 
vocab.json ADDED
The diff for this file is too large to render. See raw diff