baek26 commited on
Commit
1e15049
1 Parent(s): 56e8291

Push model using huggingface_hub.

Browse files
Files changed (3) hide show
  1. README.md +27 -55
  2. config.json +46 -72
  3. model.safetensors +2 -2
README.md CHANGED
@@ -1,71 +1,43 @@
1
  ---
2
  license: apache-2.0
3
- base_model: facebook/bart-base
4
  tags:
5
- - generated_from_trainer
6
- metrics:
7
- - rouge
8
- model-index:
9
- - name: billsum_2052_bart-base
10
- results: []
11
  ---
12
 
13
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
- should probably proofread and complete it, then remove this comment. -->
15
 
16
- # billsum_2052_bart-base
 
17
 
18
- This model is a fine-tuned version of [facebook/bart-base](https://huggingface.co/facebook/bart-base) on an unknown dataset.
19
- It achieves the following results on the evaluation set:
20
- - Loss: 2.4857
21
- - Rouge1: 0.151
22
- - Rouge2: 0.0596
23
- - Rougel: 0.123
24
- - Rougelsum: 0.1301
25
- - Gen Len: 20.0
26
 
27
- ## Model description
28
 
29
- More information needed
 
 
30
 
31
- ## Intended uses & limitations
32
 
33
- More information needed
 
34
 
35
- ## Training and evaluation data
 
 
36
 
37
- More information needed
38
 
39
- ## Training procedure
 
 
40
 
41
- ### Training hyperparameters
 
42
 
43
- The following hyperparameters were used during training:
44
- - learning_rate: 5e-05
45
- - train_batch_size: 4
46
- - eval_batch_size: 4
47
- - seed: 42
48
- - gradient_accumulation_steps: 16
49
- - total_train_batch_size: 64
50
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
51
- - lr_scheduler_type: linear
52
- - lr_scheduler_warmup_steps: 500
53
- - num_epochs: 10
54
-
55
- ### Training results
56
-
57
- | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
58
- |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|:-------:|
59
- | No log | 1.69 | 500 | 2.4571 | 0.1623 | 0.0709 | 0.1353 | 0.1414 | 20.0 |
60
- | No log | 3.38 | 1000 | 2.4533 | 0.1564 | 0.0638 | 0.1273 | 0.1345 | 20.0 |
61
- | No log | 5.07 | 1500 | 2.4592 | 0.149 | 0.0586 | 0.1216 | 0.1287 | 20.0 |
62
- | 1.6068 | 6.75 | 2000 | 2.4967 | 0.1487 | 0.0588 | 0.122 | 0.1286 | 20.0 |
63
- | 1.6068 | 8.44 | 2500 | 2.4857 | 0.151 | 0.0596 | 0.123 | 0.1301 | 20.0 |
64
-
65
-
66
- ### Framework versions
67
-
68
- - Transformers 4.38.2
69
- - Pytorch 2.0.0+cu117
70
- - Datasets 2.18.0
71
- - Tokenizers 0.15.2
 
1
  ---
2
  license: apache-2.0
 
3
  tags:
4
+ - trl
5
+ - ppo
6
+ - transformers
7
+ - reinforcement-learning
 
 
8
  ---
9
 
10
+ # TRL Model
 
11
 
12
+ This is a [TRL language model](https://github.com/huggingface/trl) that has been fine-tuned with reinforcement learning to
13
+ guide the model outputs according to a value, function, or human feedback. The model can be used for text generation.
14
 
15
+ ## Usage
 
 
 
 
 
 
 
16
 
17
+ To use this model for inference, first install the TRL library:
18
 
19
+ ```bash
20
+ python -m pip install trl
21
+ ```
22
 
23
+ You can then generate text as follows:
24
 
25
+ ```python
26
+ from transformers import pipeline
27
 
28
+ generator = pipeline("text-generation", model="baek26//tmp/tmpks0nb0mn/baek26/billsum_2052_bart-base")
29
+ outputs = generator("Hello, my llama is cute")
30
+ ```
31
 
32
+ If you want to use the model for training or to obtain the outputs from the value head, load the model as follows:
33
 
34
+ ```python
35
+ from transformers import AutoTokenizer
36
+ from trl import AutoModelForCausalLMWithValueHead
37
 
38
+ tokenizer = AutoTokenizer.from_pretrained("baek26//tmp/tmpks0nb0mn/baek26/billsum_2052_bart-base")
39
+ model = AutoModelForCausalLMWithValueHead.from_pretrained("baek26//tmp/tmpks0nb0mn/baek26/billsum_2052_bart-base")
40
 
41
+ inputs = tokenizer("Hello, my llama is cute", return_tensors="pt")
42
+ outputs = model(**inputs, labels=inputs["input_ids"])
43
+ ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json CHANGED
@@ -1,75 +1,49 @@
1
  {
2
- "_name_or_path": "facebook/bart-base",
3
- "activation_dropout": 0.1,
4
- "activation_function": "gelu",
5
- "add_bias_logits": false,
6
- "add_final_layer_norm": false,
7
- "architectures": [
8
- "BartForConditionalGeneration"
9
- ],
10
- "attention_dropout": 0.1,
11
- "bos_token_id": 0,
12
- "classif_dropout": 0.1,
13
- "classifier_dropout": 0.0,
14
- "d_model": 768,
15
- "decoder_attention_heads": 12,
16
- "decoder_ffn_dim": 3072,
17
- "decoder_layerdrop": 0.0,
18
- "decoder_layers": 6,
19
- "decoder_start_token_id": 2,
20
- "dropout": 0.1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "early_stopping": true,
22
- "encoder_attention_heads": 12,
23
- "encoder_ffn_dim": 3072,
24
- "encoder_layerdrop": 0.0,
25
- "encoder_layers": 6,
26
- "eos_token_id": 2,
27
- "forced_bos_token_id": 0,
28
- "forced_eos_token_id": 2,
29
- "gradient_checkpointing": false,
30
- "id2label": {
31
- "0": "LABEL_0",
32
- "1": "LABEL_1",
33
- "2": "LABEL_2"
34
- },
35
- "init_std": 0.02,
36
  "is_encoder_decoder": true,
37
- "label2id": {
38
- "LABEL_0": 0,
39
- "LABEL_1": 1,
40
- "LABEL_2": 2
41
- },
42
- "max_position_embeddings": 1024,
43
- "model_type": "bart",
44
- "no_repeat_ngram_size": 3,
45
- "normalize_before": false,
46
- "normalize_embedding": true,
47
- "num_beams": 4,
48
- "num_hidden_layers": 6,
49
- "pad_token_id": 1,
50
- "scale_embedding": false,
51
- "task_specific_params": {
52
- "summarization": {
53
- "length_penalty": 1.0,
54
- "max_length": 128,
55
- "min_length": 12,
56
- "num_beams": 4
57
- },
58
- "summarization_cnn": {
59
- "length_penalty": 2.0,
60
- "max_length": 142,
61
- "min_length": 56,
62
- "num_beams": 4
63
- },
64
- "summarization_xsum": {
65
- "length_penalty": 1.0,
66
- "max_length": 62,
67
- "min_length": 11,
68
- "num_beams": 6
69
- }
70
- },
71
- "torch_dtype": "float32",
72
- "transformers_version": "4.38.2",
73
- "use_cache": true,
74
- "vocab_size": 50265
75
- }
 
1
  {
2
+ "exp_name": "rlqaf",
3
+ "seed": 0,
4
+ "log_with": null,
5
+ "task_name": null,
6
+ "model_name": "facebook/bart-base",
7
+ "query_dataset": "imdb",
8
+ "reward_model": "sentiment-analysis:lvwerra/distilbert-imdb",
9
+ "remove_unused_columns": true,
10
+ "tracker_kwargs": {},
11
+ "accelerator_kwargs": {},
12
+ "project_kwargs": {},
13
+ "tracker_project_name": "trl",
14
+ "push_to_hub_if_best_kwargs": {},
15
+ "steps": 20000,
16
+ "learning_rate": 1.41e-06,
17
+ "adap_kl_ctrl": true,
18
+ "init_kl_coef": 0.2,
19
+ "kl_penalty": "kl",
20
+ "target": 6,
21
+ "horizon": 10000,
22
+ "gamma": 0.9,
23
+ "lam": 0.95,
24
+ "cliprange": 0.2,
25
+ "cliprange_value": 0.2,
26
+ "vf_coef": 0.1,
27
+ "batch_size": 1,
28
+ "forward_batch_size": null,
29
+ "mini_batch_size": 1,
30
+ "gradient_accumulation_steps": 1,
31
+ "world_size": 1,
32
+ "ppo_epochs": 4,
33
+ "max_grad_norm": null,
34
+ "optimize_cuda_cache": null,
35
+ "optimize_device_cache": false,
36
  "early_stopping": true,
37
+ "target_kl": 1,
38
+ "compare_steps": 1,
39
+ "ratio_threshold": 10.0,
40
+ "use_score_scaling": false,
41
+ "use_score_norm": false,
42
+ "score_clip": null,
43
+ "whiten_rewards": false,
 
 
 
 
 
 
 
44
  "is_encoder_decoder": true,
45
+ "is_peft_model": false,
46
+ "backward_batch_size": 1,
47
+ "global_backward_batch_size": 1,
48
+ "global_batch_size": 1
49
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58d74adc4ffa047cfba664fde6607ca5589f07c82e741e0c631efca5b77dcc3d
3
- size 557912620
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f4153290468a6d6b34c0eb88831fc5a48e436a1a9f0b5c374e7333cbe11cda3
3
+ size 557915872