update model card README.md
Browse files
README.md
CHANGED
@@ -5,7 +5,6 @@ license: mit
|
|
5 |
tags:
|
6 |
- generated_from_trainer
|
7 |
datasets:
|
8 |
-
- tomekkorbak/detoxify-pile-chunk3-0-50000
|
9 |
- tomekkorbak/detoxify-pile-chunk3-50000-100000
|
10 |
- tomekkorbak/detoxify-pile-chunk3-100000-150000
|
11 |
- tomekkorbak/detoxify-pile-chunk3-150000-200000
|
@@ -42,8 +41,6 @@ datasets:
|
|
42 |
- tomekkorbak/detoxify-pile-chunk3-1700000-1750000
|
43 |
- tomekkorbak/detoxify-pile-chunk3-1750000-1800000
|
44 |
- tomekkorbak/detoxify-pile-chunk3-1800000-1850000
|
45 |
-
- tomekkorbak/detoxify-pile-chunk3-1850000-1900000
|
46 |
-
- tomekkorbak/detoxify-pile-chunk3-1900000-1950000
|
47 |
model-index:
|
48 |
- name: kejian/cpsc-bincond
|
49 |
results: []
|
@@ -54,7 +51,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
54 |
|
55 |
# kejian/cpsc-bincond
|
56 |
|
57 |
-
This model was trained from scratch on the tomekkorbak/detoxify-pile-chunk3-
|
58 |
|
59 |
## Model description
|
60 |
|
@@ -74,10 +71,10 @@ More information needed
|
|
74 |
|
75 |
The following hyperparameters were used during training:
|
76 |
- learning_rate: 0.0005
|
77 |
-
- train_batch_size:
|
78 |
-
- eval_batch_size:
|
79 |
- seed: 42
|
80 |
-
- gradient_accumulation_steps:
|
81 |
- total_train_batch_size: 64
|
82 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
83 |
- lr_scheduler_type: linear
|
@@ -95,11 +92,10 @@ The following hyperparameters were used during training:
|
|
95 |
|
96 |
# Full config
|
97 |
{'dataset': {'conditional_training_config': {'aligned_prefix': '<|aligned|>',
|
98 |
-
'drop_token_fraction': 0.
|
99 |
'misaligned_prefix': '<|misaligned|>',
|
100 |
'threshold': 0.0007848},
|
101 |
-
'datasets': ['tomekkorbak/detoxify-pile-chunk3-
|
102 |
-
'tomekkorbak/detoxify-pile-chunk3-50000-100000',
|
103 |
'tomekkorbak/detoxify-pile-chunk3-100000-150000',
|
104 |
'tomekkorbak/detoxify-pile-chunk3-150000-200000',
|
105 |
'tomekkorbak/detoxify-pile-chunk3-200000-250000',
|
@@ -134,9 +130,7 @@ The following hyperparameters were used during training:
|
|
134 |
'tomekkorbak/detoxify-pile-chunk3-1650000-1700000',
|
135 |
'tomekkorbak/detoxify-pile-chunk3-1700000-1750000',
|
136 |
'tomekkorbak/detoxify-pile-chunk3-1750000-1800000',
|
137 |
-
'tomekkorbak/detoxify-pile-chunk3-1800000-1850000',
|
138 |
-
'tomekkorbak/detoxify-pile-chunk3-1850000-1900000',
|
139 |
-
'tomekkorbak/detoxify-pile-chunk3-1900000-1950000'],
|
140 |
'is_split_by_sentences': True},
|
141 |
'generation': {'force_call_on': [21362],
|
142 |
'metrics_configs': [{}, {'n': 1}, {'n': 2}, {'n': 5}],
|
@@ -149,7 +143,7 @@ The following hyperparameters were used during training:
|
|
149 |
'top_k': 0,
|
150 |
'top_p': 0.9},
|
151 |
'name': 'unconditional',
|
152 |
-
'num_samples':
|
153 |
'prefix': '<|aligned|>'},
|
154 |
{'generate_kwargs': {'bad_words_ids': [[50257],
|
155 |
[50258]],
|
@@ -168,7 +162,7 @@ The following hyperparameters were used during training:
|
|
168 |
'kl_gpt3_callback': {'force_call_on': [21362],
|
169 |
'gpt3_kwargs': {'model_name': 'davinci'},
|
170 |
'max_tokens': 64,
|
171 |
-
'num_samples':
|
172 |
'prefix': '<|aligned|>'},
|
173 |
'model': {'from_scratch': True,
|
174 |
'gpt2_config_kwargs': {'reorder_and_upcast_attn': True,
|
@@ -186,10 +180,10 @@ The following hyperparameters were used during training:
|
|
186 |
'hub_strategy': 'all_checkpoints',
|
187 |
'learning_rate': 0.0005,
|
188 |
'logging_first_step': True,
|
189 |
-
'logging_steps':
|
190 |
'num_tokens': 2800000000.0,
|
191 |
'output_dir': 'training_output30',
|
192 |
-
'per_device_train_batch_size':
|
193 |
'push_to_hub': True,
|
194 |
'remove_unused_columns': False,
|
195 |
'save_steps': 21362,
|
@@ -199,4 +193,4 @@ The following hyperparameters were used during training:
|
|
199 |
'weight_decay': 0.1}}
|
200 |
|
201 |
# Wandb URL:
|
202 |
-
https://wandb.ai/kejian/uncategorized/runs/
|
|
|
5 |
tags:
|
6 |
- generated_from_trainer
|
7 |
datasets:
|
|
|
8 |
- tomekkorbak/detoxify-pile-chunk3-50000-100000
|
9 |
- tomekkorbak/detoxify-pile-chunk3-100000-150000
|
10 |
- tomekkorbak/detoxify-pile-chunk3-150000-200000
|
|
|
41 |
- tomekkorbak/detoxify-pile-chunk3-1700000-1750000
|
42 |
- tomekkorbak/detoxify-pile-chunk3-1750000-1800000
|
43 |
- tomekkorbak/detoxify-pile-chunk3-1800000-1850000
|
|
|
|
|
44 |
model-index:
|
45 |
- name: kejian/cpsc-bincond
|
46 |
results: []
|
|
|
51 |
|
52 |
# kejian/cpsc-bincond
|
53 |
|
54 |
+
This model was trained from scratch on the tomekkorbak/detoxify-pile-chunk3-50000-100000, the tomekkorbak/detoxify-pile-chunk3-100000-150000, the tomekkorbak/detoxify-pile-chunk3-150000-200000, the tomekkorbak/detoxify-pile-chunk3-200000-250000, the tomekkorbak/detoxify-pile-chunk3-250000-300000, the tomekkorbak/detoxify-pile-chunk3-300000-350000, the tomekkorbak/detoxify-pile-chunk3-350000-400000, the tomekkorbak/detoxify-pile-chunk3-400000-450000, the tomekkorbak/detoxify-pile-chunk3-450000-500000, the tomekkorbak/detoxify-pile-chunk3-500000-550000, the tomekkorbak/detoxify-pile-chunk3-550000-600000, the tomekkorbak/detoxify-pile-chunk3-600000-650000, the tomekkorbak/detoxify-pile-chunk3-650000-700000, the tomekkorbak/detoxify-pile-chunk3-700000-750000, the tomekkorbak/detoxify-pile-chunk3-750000-800000, the tomekkorbak/detoxify-pile-chunk3-800000-850000, the tomekkorbak/detoxify-pile-chunk3-850000-900000, the tomekkorbak/detoxify-pile-chunk3-900000-950000, the tomekkorbak/detoxify-pile-chunk3-950000-1000000, the tomekkorbak/detoxify-pile-chunk3-1000000-1050000, the tomekkorbak/detoxify-pile-chunk3-1050000-1100000, the tomekkorbak/detoxify-pile-chunk3-1100000-1150000, the tomekkorbak/detoxify-pile-chunk3-1150000-1200000, the tomekkorbak/detoxify-pile-chunk3-1200000-1250000, the tomekkorbak/detoxify-pile-chunk3-1250000-1300000, the tomekkorbak/detoxify-pile-chunk3-1300000-1350000, the tomekkorbak/detoxify-pile-chunk3-1350000-1400000, the tomekkorbak/detoxify-pile-chunk3-1400000-1450000, the tomekkorbak/detoxify-pile-chunk3-1450000-1500000, the tomekkorbak/detoxify-pile-chunk3-1500000-1550000, the tomekkorbak/detoxify-pile-chunk3-1550000-1600000, the tomekkorbak/detoxify-pile-chunk3-1600000-1650000, the tomekkorbak/detoxify-pile-chunk3-1650000-1700000, the tomekkorbak/detoxify-pile-chunk3-1700000-1750000, the tomekkorbak/detoxify-pile-chunk3-1750000-1800000 and the tomekkorbak/detoxify-pile-chunk3-1800000-1850000 datasets.
|
55 |
|
56 |
## Model description
|
57 |
|
|
|
71 |
|
72 |
The following hyperparameters were used during training:
|
73 |
- learning_rate: 0.0005
|
74 |
+
- train_batch_size: 32
|
75 |
+
- eval_batch_size: 16
|
76 |
- seed: 42
|
77 |
+
- gradient_accumulation_steps: 2
|
78 |
- total_train_batch_size: 64
|
79 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
80 |
- lr_scheduler_type: linear
|
|
|
92 |
|
93 |
# Full config
|
94 |
{'dataset': {'conditional_training_config': {'aligned_prefix': '<|aligned|>',
|
95 |
+
'drop_token_fraction': 0.02,
|
96 |
'misaligned_prefix': '<|misaligned|>',
|
97 |
'threshold': 0.0007848},
|
98 |
+
'datasets': ['tomekkorbak/detoxify-pile-chunk3-50000-100000',
|
|
|
99 |
'tomekkorbak/detoxify-pile-chunk3-100000-150000',
|
100 |
'tomekkorbak/detoxify-pile-chunk3-150000-200000',
|
101 |
'tomekkorbak/detoxify-pile-chunk3-200000-250000',
|
|
|
130 |
'tomekkorbak/detoxify-pile-chunk3-1650000-1700000',
|
131 |
'tomekkorbak/detoxify-pile-chunk3-1700000-1750000',
|
132 |
'tomekkorbak/detoxify-pile-chunk3-1750000-1800000',
|
133 |
+
'tomekkorbak/detoxify-pile-chunk3-1800000-1850000'],
|
|
|
|
|
134 |
'is_split_by_sentences': True},
|
135 |
'generation': {'force_call_on': [21362],
|
136 |
'metrics_configs': [{}, {'n': 1}, {'n': 2}, {'n': 5}],
|
|
|
143 |
'top_k': 0,
|
144 |
'top_p': 0.9},
|
145 |
'name': 'unconditional',
|
146 |
+
'num_samples': 2048,
|
147 |
'prefix': '<|aligned|>'},
|
148 |
{'generate_kwargs': {'bad_words_ids': [[50257],
|
149 |
[50258]],
|
|
|
162 |
'kl_gpt3_callback': {'force_call_on': [21362],
|
163 |
'gpt3_kwargs': {'model_name': 'davinci'},
|
164 |
'max_tokens': 64,
|
165 |
+
'num_samples': 2048,
|
166 |
'prefix': '<|aligned|>'},
|
167 |
'model': {'from_scratch': True,
|
168 |
'gpt2_config_kwargs': {'reorder_and_upcast_attn': True,
|
|
|
180 |
'hub_strategy': 'all_checkpoints',
|
181 |
'learning_rate': 0.0005,
|
182 |
'logging_first_step': True,
|
183 |
+
'logging_steps': 50,
|
184 |
'num_tokens': 2800000000.0,
|
185 |
'output_dir': 'training_output30',
|
186 |
+
'per_device_train_batch_size': 16,
|
187 |
'push_to_hub': True,
|
188 |
'remove_unused_columns': False,
|
189 |
'save_steps': 21362,
|
|
|
193 |
'weight_decay': 0.1}}
|
194 |
|
195 |
# Wandb URL:
|
196 |
+
https://wandb.ai/kejian/uncategorized/runs/30tl243y
|