kejian commited on
Commit
bde384a
1 Parent(s): 16ecfc3

update model card README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -18
README.md CHANGED
@@ -5,7 +5,6 @@ license: mit
5
  tags:
6
  - generated_from_trainer
7
  datasets:
8
- - tomekkorbak/detoxify-pile-chunk3-0-50000
9
  - tomekkorbak/detoxify-pile-chunk3-50000-100000
10
  - tomekkorbak/detoxify-pile-chunk3-100000-150000
11
  - tomekkorbak/detoxify-pile-chunk3-150000-200000
@@ -42,8 +41,6 @@ datasets:
42
  - tomekkorbak/detoxify-pile-chunk3-1700000-1750000
43
  - tomekkorbak/detoxify-pile-chunk3-1750000-1800000
44
  - tomekkorbak/detoxify-pile-chunk3-1800000-1850000
45
- - tomekkorbak/detoxify-pile-chunk3-1850000-1900000
46
- - tomekkorbak/detoxify-pile-chunk3-1900000-1950000
47
  model-index:
48
  - name: kejian/cpsc-bincond
49
  results: []
@@ -54,7 +51,7 @@ should probably proofread and complete it, then remove this comment. -->
54
 
55
  # kejian/cpsc-bincond
56
 
57
- This model was trained from scratch on the tomekkorbak/detoxify-pile-chunk3-0-50000, the tomekkorbak/detoxify-pile-chunk3-50000-100000, the tomekkorbak/detoxify-pile-chunk3-100000-150000, the tomekkorbak/detoxify-pile-chunk3-150000-200000, the tomekkorbak/detoxify-pile-chunk3-200000-250000, the tomekkorbak/detoxify-pile-chunk3-250000-300000, the tomekkorbak/detoxify-pile-chunk3-300000-350000, the tomekkorbak/detoxify-pile-chunk3-350000-400000, the tomekkorbak/detoxify-pile-chunk3-400000-450000, the tomekkorbak/detoxify-pile-chunk3-450000-500000, the tomekkorbak/detoxify-pile-chunk3-500000-550000, the tomekkorbak/detoxify-pile-chunk3-550000-600000, the tomekkorbak/detoxify-pile-chunk3-600000-650000, the tomekkorbak/detoxify-pile-chunk3-650000-700000, the tomekkorbak/detoxify-pile-chunk3-700000-750000, the tomekkorbak/detoxify-pile-chunk3-750000-800000, the tomekkorbak/detoxify-pile-chunk3-800000-850000, the tomekkorbak/detoxify-pile-chunk3-850000-900000, the tomekkorbak/detoxify-pile-chunk3-900000-950000, the tomekkorbak/detoxify-pile-chunk3-950000-1000000, the tomekkorbak/detoxify-pile-chunk3-1000000-1050000, the tomekkorbak/detoxify-pile-chunk3-1050000-1100000, the tomekkorbak/detoxify-pile-chunk3-1100000-1150000, the tomekkorbak/detoxify-pile-chunk3-1150000-1200000, the tomekkorbak/detoxify-pile-chunk3-1200000-1250000, the tomekkorbak/detoxify-pile-chunk3-1250000-1300000, the tomekkorbak/detoxify-pile-chunk3-1300000-1350000, the tomekkorbak/detoxify-pile-chunk3-1350000-1400000, the tomekkorbak/detoxify-pile-chunk3-1400000-1450000, the tomekkorbak/detoxify-pile-chunk3-1450000-1500000, the tomekkorbak/detoxify-pile-chunk3-1500000-1550000, the tomekkorbak/detoxify-pile-chunk3-1550000-1600000, the tomekkorbak/detoxify-pile-chunk3-1600000-1650000, the tomekkorbak/detoxify-pile-chunk3-1650000-1700000, the tomekkorbak/detoxify-pile-chunk3-1700000-1750000, the tomekkorbak/detoxify-pile-chunk3-1750000-1800000, the tomekkorbak/detoxify-pile-chunk3-1800000-1850000, the tomekkorbak/detoxify-pile-chunk3-1850000-1900000 and the tomekkorbak/detoxify-pile-chunk3-1900000-1950000 datasets.
58
 
59
  ## Model description
60
 
@@ -74,10 +71,10 @@ More information needed
74
 
75
  The following hyperparameters were used during training:
76
  - learning_rate: 0.0005
77
- - train_batch_size: 8
78
- - eval_batch_size: 8
79
  - seed: 42
80
- - gradient_accumulation_steps: 8
81
  - total_train_batch_size: 64
82
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
83
  - lr_scheduler_type: linear
@@ -95,11 +92,10 @@ The following hyperparameters were used during training:
95
 
96
  # Full config
97
  {'dataset': {'conditional_training_config': {'aligned_prefix': '<|aligned|>',
98
- 'drop_token_fraction': 0.03,
99
  'misaligned_prefix': '<|misaligned|>',
100
  'threshold': 0.0007848},
101
- 'datasets': ['tomekkorbak/detoxify-pile-chunk3-0-50000',
102
- 'tomekkorbak/detoxify-pile-chunk3-50000-100000',
103
  'tomekkorbak/detoxify-pile-chunk3-100000-150000',
104
  'tomekkorbak/detoxify-pile-chunk3-150000-200000',
105
  'tomekkorbak/detoxify-pile-chunk3-200000-250000',
@@ -134,9 +130,7 @@ The following hyperparameters were used during training:
134
  'tomekkorbak/detoxify-pile-chunk3-1650000-1700000',
135
  'tomekkorbak/detoxify-pile-chunk3-1700000-1750000',
136
  'tomekkorbak/detoxify-pile-chunk3-1750000-1800000',
137
- 'tomekkorbak/detoxify-pile-chunk3-1800000-1850000',
138
- 'tomekkorbak/detoxify-pile-chunk3-1850000-1900000',
139
- 'tomekkorbak/detoxify-pile-chunk3-1900000-1950000'],
140
  'is_split_by_sentences': True},
141
  'generation': {'force_call_on': [21362],
142
  'metrics_configs': [{}, {'n': 1}, {'n': 2}, {'n': 5}],
@@ -149,7 +143,7 @@ The following hyperparameters were used during training:
149
  'top_k': 0,
150
  'top_p': 0.9},
151
  'name': 'unconditional',
152
- 'num_samples': 2560,
153
  'prefix': '<|aligned|>'},
154
  {'generate_kwargs': {'bad_words_ids': [[50257],
155
  [50258]],
@@ -168,7 +162,7 @@ The following hyperparameters were used during training:
168
  'kl_gpt3_callback': {'force_call_on': [21362],
169
  'gpt3_kwargs': {'model_name': 'davinci'},
170
  'max_tokens': 64,
171
- 'num_samples': 32,
172
  'prefix': '<|aligned|>'},
173
  'model': {'from_scratch': True,
174
  'gpt2_config_kwargs': {'reorder_and_upcast_attn': True,
@@ -186,10 +180,10 @@ The following hyperparameters were used during training:
186
  'hub_strategy': 'all_checkpoints',
187
  'learning_rate': 0.0005,
188
  'logging_first_step': True,
189
- 'logging_steps': 500,
190
  'num_tokens': 2800000000.0,
191
  'output_dir': 'training_output30',
192
- 'per_device_train_batch_size': 8,
193
  'push_to_hub': True,
194
  'remove_unused_columns': False,
195
  'save_steps': 21362,
@@ -199,4 +193,4 @@ The following hyperparameters were used during training:
199
  'weight_decay': 0.1}}
200
 
201
  # Wandb URL:
202
- https://wandb.ai/kejian/uncategorized/runs/1z69qu1r
 
5
  tags:
6
  - generated_from_trainer
7
  datasets:
 
8
  - tomekkorbak/detoxify-pile-chunk3-50000-100000
9
  - tomekkorbak/detoxify-pile-chunk3-100000-150000
10
  - tomekkorbak/detoxify-pile-chunk3-150000-200000
 
41
  - tomekkorbak/detoxify-pile-chunk3-1700000-1750000
42
  - tomekkorbak/detoxify-pile-chunk3-1750000-1800000
43
  - tomekkorbak/detoxify-pile-chunk3-1800000-1850000
 
 
44
  model-index:
45
  - name: kejian/cpsc-bincond
46
  results: []
 
51
 
52
  # kejian/cpsc-bincond
53
 
54
+ This model was trained from scratch on the tomekkorbak/detoxify-pile-chunk3-50000-100000, the tomekkorbak/detoxify-pile-chunk3-100000-150000, the tomekkorbak/detoxify-pile-chunk3-150000-200000, the tomekkorbak/detoxify-pile-chunk3-200000-250000, the tomekkorbak/detoxify-pile-chunk3-250000-300000, the tomekkorbak/detoxify-pile-chunk3-300000-350000, the tomekkorbak/detoxify-pile-chunk3-350000-400000, the tomekkorbak/detoxify-pile-chunk3-400000-450000, the tomekkorbak/detoxify-pile-chunk3-450000-500000, the tomekkorbak/detoxify-pile-chunk3-500000-550000, the tomekkorbak/detoxify-pile-chunk3-550000-600000, the tomekkorbak/detoxify-pile-chunk3-600000-650000, the tomekkorbak/detoxify-pile-chunk3-650000-700000, the tomekkorbak/detoxify-pile-chunk3-700000-750000, the tomekkorbak/detoxify-pile-chunk3-750000-800000, the tomekkorbak/detoxify-pile-chunk3-800000-850000, the tomekkorbak/detoxify-pile-chunk3-850000-900000, the tomekkorbak/detoxify-pile-chunk3-900000-950000, the tomekkorbak/detoxify-pile-chunk3-950000-1000000, the tomekkorbak/detoxify-pile-chunk3-1000000-1050000, the tomekkorbak/detoxify-pile-chunk3-1050000-1100000, the tomekkorbak/detoxify-pile-chunk3-1100000-1150000, the tomekkorbak/detoxify-pile-chunk3-1150000-1200000, the tomekkorbak/detoxify-pile-chunk3-1200000-1250000, the tomekkorbak/detoxify-pile-chunk3-1250000-1300000, the tomekkorbak/detoxify-pile-chunk3-1300000-1350000, the tomekkorbak/detoxify-pile-chunk3-1350000-1400000, the tomekkorbak/detoxify-pile-chunk3-1400000-1450000, the tomekkorbak/detoxify-pile-chunk3-1450000-1500000, the tomekkorbak/detoxify-pile-chunk3-1500000-1550000, the tomekkorbak/detoxify-pile-chunk3-1550000-1600000, the tomekkorbak/detoxify-pile-chunk3-1600000-1650000, the tomekkorbak/detoxify-pile-chunk3-1650000-1700000, the tomekkorbak/detoxify-pile-chunk3-1700000-1750000, the tomekkorbak/detoxify-pile-chunk3-1750000-1800000 and the tomekkorbak/detoxify-pile-chunk3-1800000-1850000 datasets.
55
 
56
  ## Model description
57
 
 
71
 
72
  The following hyperparameters were used during training:
73
  - learning_rate: 0.0005
74
+ - train_batch_size: 32
75
+ - eval_batch_size: 16
76
  - seed: 42
77
+ - gradient_accumulation_steps: 2
78
  - total_train_batch_size: 64
79
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
80
  - lr_scheduler_type: linear
 
92
 
93
  # Full config
94
  {'dataset': {'conditional_training_config': {'aligned_prefix': '<|aligned|>',
95
+ 'drop_token_fraction': 0.02,
96
  'misaligned_prefix': '<|misaligned|>',
97
  'threshold': 0.0007848},
98
+ 'datasets': ['tomekkorbak/detoxify-pile-chunk3-50000-100000',
 
99
  'tomekkorbak/detoxify-pile-chunk3-100000-150000',
100
  'tomekkorbak/detoxify-pile-chunk3-150000-200000',
101
  'tomekkorbak/detoxify-pile-chunk3-200000-250000',
 
130
  'tomekkorbak/detoxify-pile-chunk3-1650000-1700000',
131
  'tomekkorbak/detoxify-pile-chunk3-1700000-1750000',
132
  'tomekkorbak/detoxify-pile-chunk3-1750000-1800000',
133
+ 'tomekkorbak/detoxify-pile-chunk3-1800000-1850000'],
 
 
134
  'is_split_by_sentences': True},
135
  'generation': {'force_call_on': [21362],
136
  'metrics_configs': [{}, {'n': 1}, {'n': 2}, {'n': 5}],
 
143
  'top_k': 0,
144
  'top_p': 0.9},
145
  'name': 'unconditional',
146
+ 'num_samples': 2048,
147
  'prefix': '<|aligned|>'},
148
  {'generate_kwargs': {'bad_words_ids': [[50257],
149
  [50258]],
 
162
  'kl_gpt3_callback': {'force_call_on': [21362],
163
  'gpt3_kwargs': {'model_name': 'davinci'},
164
  'max_tokens': 64,
165
+ 'num_samples': 2048,
166
  'prefix': '<|aligned|>'},
167
  'model': {'from_scratch': True,
168
  'gpt2_config_kwargs': {'reorder_and_upcast_attn': True,
 
180
  'hub_strategy': 'all_checkpoints',
181
  'learning_rate': 0.0005,
182
  'logging_first_step': True,
183
+ 'logging_steps': 50,
184
  'num_tokens': 2800000000.0,
185
  'output_dir': 'training_output30',
186
+ 'per_device_train_batch_size': 16,
187
  'push_to_hub': True,
188
  'remove_unused_columns': False,
189
  'save_steps': 21362,
 
193
  'weight_decay': 0.1}}
194
 
195
  # Wandb URL:
196
+ https://wandb.ai/kejian/uncategorized/runs/30tl243y