busycaesar commited on
Commit
3fc9896
·
verified ·
1 Parent(s): b721fa0

RLHF model of StarCoder

Browse files
Files changed (6) hide show
  1. README.md +58 -0
  2. config.json +48 -0
  3. model.safetensors +3 -0
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +34 -0
  6. training_args.bin +3 -0
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: bigcode/tiny_starcoder_py
3
+ library_name: transformers
4
+ model_name: tinystarcoder-rlhf-model
5
+ tags:
6
+ - generated_from_trainer
7
+ - reward-trainer
8
+ - trl
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for tinystarcoder-rlhf-model
13
+
14
+ This model is a fine-tuned version of [bigcode/tiny_starcoder_py](https://huggingface.co/bigcode/tiny_starcoder_py).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ text = "The capital of France is Paris."
23
+ rewarder = pipeline(model="busycaesar/tinystarcoder-rlhf-model", device="cuda")
24
+ output = rewarder(text)[0]
25
+ print(output["score"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+
31
+
32
+
33
+
34
+ This model was trained with Reward.
35
+
36
+ ### Framework versions
37
+
38
+ - TRL: 1.2.0
39
+ - Transformers: 5.0.0
40
+ - Pytorch: 2.10.0+cu128
41
+ - Datasets: 4.8.4
42
+ - Tokenizers: 0.22.2
43
+
44
+ ## Citations
45
+
46
+
47
+
48
+ Cite TRL as:
49
+
50
+ ```bibtex
51
+ @software{vonwerra2020trl,
52
+ title = {{TRL: Transformers Reinforcement Learning}},
53
+ author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
54
+ license = {Apache-2.0},
55
+ url = {https://github.com/huggingface/trl},
56
+ year = {2020}
57
+ }
58
+ ```
config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_pytorch_tanh",
3
+ "add_cross_attention": false,
4
+ "architectures": [
5
+ "GPTBigCodeForSequenceClassification"
6
+ ],
7
+ "attention_softmax_in_fp32": true,
8
+ "attn_pdrop": 0.1,
9
+ "bos_token_id": 0,
10
+ "dtype": "float32",
11
+ "embd_pdrop": 0.1,
12
+ "eos_token_id": 0,
13
+ "id2label": {
14
+ "0": "LABEL_0"
15
+ },
16
+ "inference_runner": 0,
17
+ "initializer_range": 0.02,
18
+ "label2id": {
19
+ "LABEL_0": 0
20
+ },
21
+ "layer_norm_epsilon": 1e-05,
22
+ "max_batch_size": null,
23
+ "max_sequence_length": null,
24
+ "model_type": "gpt_bigcode",
25
+ "multi_query": true,
26
+ "n_embd": 768,
27
+ "n_head": 12,
28
+ "n_inner": 3072,
29
+ "n_layer": 20,
30
+ "n_positions": 8192,
31
+ "num_key_value_heads": 1,
32
+ "pad_key_length": true,
33
+ "pad_token_id": 0,
34
+ "pre_allocate_kv_cache": false,
35
+ "resid_pdrop": 0.1,
36
+ "scale_attention_softmax_in_fp32": true,
37
+ "scale_attn_weights": true,
38
+ "summary_activation": null,
39
+ "summary_first_dropout": 0.1,
40
+ "summary_proj_to_labels": true,
41
+ "summary_type": "cls_index",
42
+ "summary_use_proj": true,
43
+ "tie_word_embeddings": true,
44
+ "transformers_version": "5.0.0",
45
+ "use_cache": false,
46
+ "validate_runner_input": true,
47
+ "vocab_size": 49152
48
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f141ff198b3458754e6dd0f7b7095fa367cbf25963c0d8a7aa7b073a057adefe
3
+ size 656604448
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<|endoftext|>",
5
+ "eos_token": "<|endoftext|>",
6
+ "errors": "replace",
7
+ "extra_special_tokens": [
8
+ "<|endoftext|>",
9
+ "<fim_prefix>",
10
+ "<fim_middle>",
11
+ "<fim_suffix>",
12
+ "<fim_pad>",
13
+ "<filename>",
14
+ "<gh_stars>",
15
+ "<issue_start>",
16
+ "<issue_comment>",
17
+ "<issue_closed>",
18
+ "<jupyter_start>",
19
+ "<jupyter_text>",
20
+ "<jupyter_code>",
21
+ "<jupyter_output>",
22
+ "<empty_output>",
23
+ "<commit_before>",
24
+ "<commit_msg>",
25
+ "<commit_after>",
26
+ "<reponame>"
27
+ ],
28
+ "is_local": false,
29
+ "model_max_length": 1000000000000000019884624838656,
30
+ "pad_token": "<|endoftext|>",
31
+ "tokenizer_class": "GPT2Tokenizer",
32
+ "unk_token": "<|endoftext|>",
33
+ "vocab_size": 49152
34
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a464612f41fd73b15b7f631d8ca3fba8817d72fc198dd74c2dff80f80606279
3
+ size 5393