kejian
/

cpsc-bincond

@@ -5,7 +5,6 @@ license: mit
 tags:
 - generated_from_trainer
 datasets:
-- tomekkorbak/detoxify-pile-chunk3-0-50000
 - tomekkorbak/detoxify-pile-chunk3-50000-100000
 - tomekkorbak/detoxify-pile-chunk3-100000-150000
 - tomekkorbak/detoxify-pile-chunk3-150000-200000
@@ -42,8 +41,6 @@ datasets:
 - tomekkorbak/detoxify-pile-chunk3-1700000-1750000
 - tomekkorbak/detoxify-pile-chunk3-1750000-1800000
 - tomekkorbak/detoxify-pile-chunk3-1800000-1850000
-- tomekkorbak/detoxify-pile-chunk3-1850000-1900000
-- tomekkorbak/detoxify-pile-chunk3-1900000-1950000
 model-index:
 - name: kejian/cpsc-bincond
   results: []
@@ -54,7 +51,7 @@ should probably proofread and complete it, then remove this comment. -->
 # kejian/cpsc-bincond
-This model was trained from scratch on the tomekkorbak/detoxify-pile-chunk3-0-50000, the tomekkorbak/detoxify-pile-chunk3-50000-100000, the tomekkorbak/detoxify-pile-chunk3-100000-150000, the tomekkorbak/detoxify-pile-chunk3-150000-200000, the tomekkorbak/detoxify-pile-chunk3-200000-250000, the tomekkorbak/detoxify-pile-chunk3-250000-300000, the tomekkorbak/detoxify-pile-chunk3-300000-350000, the tomekkorbak/detoxify-pile-chunk3-350000-400000, the tomekkorbak/detoxify-pile-chunk3-400000-450000, the tomekkorbak/detoxify-pile-chunk3-450000-500000, the tomekkorbak/detoxify-pile-chunk3-500000-550000, the tomekkorbak/detoxify-pile-chunk3-550000-600000, the tomekkorbak/detoxify-pile-chunk3-600000-650000, the tomekkorbak/detoxify-pile-chunk3-650000-700000, the tomekkorbak/detoxify-pile-chunk3-700000-750000, the tomekkorbak/detoxify-pile-chunk3-750000-800000, the tomekkorbak/detoxify-pile-chunk3-800000-850000, the tomekkorbak/detoxify-pile-chunk3-850000-900000, the tomekkorbak/detoxify-pile-chunk3-900000-950000, the tomekkorbak/detoxify-pile-chunk3-950000-1000000, the tomekkorbak/detoxify-pile-chunk3-1000000-1050000, the tomekkorbak/detoxify-pile-chunk3-1050000-1100000, the tomekkorbak/detoxify-pile-chunk3-1100000-1150000, the tomekkorbak/detoxify-pile-chunk3-1150000-1200000, the tomekkorbak/detoxify-pile-chunk3-1200000-1250000, the tomekkorbak/detoxify-pile-chunk3-1250000-1300000, the tomekkorbak/detoxify-pile-chunk3-1300000-1350000, the tomekkorbak/detoxify-pile-chunk3-1350000-1400000, the tomekkorbak/detoxify-pile-chunk3-1400000-1450000, the tomekkorbak/detoxify-pile-chunk3-1450000-1500000, the tomekkorbak/detoxify-pile-chunk3-1500000-1550000, the tomekkorbak/detoxify-pile-chunk3-1550000-1600000, the tomekkorbak/detoxify-pile-chunk3-1600000-1650000, the tomekkorbak/detoxify-pile-chunk3-1650000-1700000, the tomekkorbak/detoxify-pile-chunk3-1700000-1750000, the tomekkorbak/detoxify-pile-chunk3-1750000-1800000, the tomekkorbak/detoxify-pile-chunk3-1800000-1850000, the tomekkorbak/detoxify-pile-chunk3-1850000-1900000 and the tomekkorbak/detoxify-pile-chunk3-1900000-1950000 datasets.
 ## Model description
@@ -74,10 +71,10 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0005
-- train_batch_size: 8
-- eval_batch_size: 8
 - seed: 42
-- gradient_accumulation_steps: 8
 - total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
@@ -95,11 +92,10 @@ The following hyperparameters were used during training:
 # Full config
 {'dataset': {'conditional_training_config': {'aligned_prefix': '<|aligned|>',
-                                             'drop_token_fraction': 0.03,
                                              'misaligned_prefix': '<|misaligned|>',
                                              'threshold': 0.0007848},
-             'datasets': ['tomekkorbak/detoxify-pile-chunk3-0-50000',
-                          'tomekkorbak/detoxify-pile-chunk3-50000-100000',
                           'tomekkorbak/detoxify-pile-chunk3-100000-150000',
                           'tomekkorbak/detoxify-pile-chunk3-150000-200000',
                           'tomekkorbak/detoxify-pile-chunk3-200000-250000',
@@ -134,9 +130,7 @@ The following hyperparameters were used during training:
                           'tomekkorbak/detoxify-pile-chunk3-1650000-1700000',
                           'tomekkorbak/detoxify-pile-chunk3-1700000-1750000',
                           'tomekkorbak/detoxify-pile-chunk3-1750000-1800000',
-                          'tomekkorbak/detoxify-pile-chunk3-1800000-1850000',
-                          'tomekkorbak/detoxify-pile-chunk3-1850000-1900000',
-                          'tomekkorbak/detoxify-pile-chunk3-1900000-1950000'],
              'is_split_by_sentences': True},
  'generation': {'force_call_on': [21362],
                 'metrics_configs': [{}, {'n': 1}, {'n': 2}, {'n': 5}],
@@ -149,7 +143,7 @@ The following hyperparameters were used during training:
                                                           'top_k': 0,
                                                           'top_p': 0.9},
                                       'name': 'unconditional',
-                                      'num_samples': 2560,
                                       'prefix': '<|aligned|>'},
                                      {'generate_kwargs': {'bad_words_ids': [[50257],
                                                                             [50258]],
@@ -168,7 +162,7 @@ The following hyperparameters were used during training:
  'kl_gpt3_callback': {'force_call_on': [21362],
                       'gpt3_kwargs': {'model_name': 'davinci'},
                       'max_tokens': 64,
-                      'num_samples': 32,
                       'prefix': '<|aligned|>'},
  'model': {'from_scratch': True,
            'gpt2_config_kwargs': {'reorder_and_upcast_attn': True,
@@ -186,10 +180,10 @@ The following hyperparameters were used during training:
               'hub_strategy': 'all_checkpoints',
               'learning_rate': 0.0005,
               'logging_first_step': True,
-              'logging_steps': 500,
               'num_tokens': 2800000000.0,
               'output_dir': 'training_output30',
-              'per_device_train_batch_size': 8,
               'push_to_hub': True,
               'remove_unused_columns': False,
               'save_steps': 21362,
@@ -199,4 +193,4 @@ The following hyperparameters were used during training:
               'weight_decay': 0.1}}
 # Wandb URL:
-https://wandb.ai/kejian/uncategorized/runs/1z69qu1r

 tags:
 - generated_from_trainer
 datasets:
 - tomekkorbak/detoxify-pile-chunk3-50000-100000
 - tomekkorbak/detoxify-pile-chunk3-100000-150000
 - tomekkorbak/detoxify-pile-chunk3-150000-200000
 - tomekkorbak/detoxify-pile-chunk3-1700000-1750000
 - tomekkorbak/detoxify-pile-chunk3-1750000-1800000
 - tomekkorbak/detoxify-pile-chunk3-1800000-1850000
 model-index:
 - name: kejian/cpsc-bincond
   results: []
 # kejian/cpsc-bincond
+This model was trained from scratch on the tomekkorbak/detoxify-pile-chunk3-50000-100000, the tomekkorbak/detoxify-pile-chunk3-100000-150000, the tomekkorbak/detoxify-pile-chunk3-150000-200000, the tomekkorbak/detoxify-pile-chunk3-200000-250000, the tomekkorbak/detoxify-pile-chunk3-250000-300000, the tomekkorbak/detoxify-pile-chunk3-300000-350000, the tomekkorbak/detoxify-pile-chunk3-350000-400000, the tomekkorbak/detoxify-pile-chunk3-400000-450000, the tomekkorbak/detoxify-pile-chunk3-450000-500000, the tomekkorbak/detoxify-pile-chunk3-500000-550000, the tomekkorbak/detoxify-pile-chunk3-550000-600000, the tomekkorbak/detoxify-pile-chunk3-600000-650000, the tomekkorbak/detoxify-pile-chunk3-650000-700000, the tomekkorbak/detoxify-pile-chunk3-700000-750000, the tomekkorbak/detoxify-pile-chunk3-750000-800000, the tomekkorbak/detoxify-pile-chunk3-800000-850000, the tomekkorbak/detoxify-pile-chunk3-850000-900000, the tomekkorbak/detoxify-pile-chunk3-900000-950000, the tomekkorbak/detoxify-pile-chunk3-950000-1000000, the tomekkorbak/detoxify-pile-chunk3-1000000-1050000, the tomekkorbak/detoxify-pile-chunk3-1050000-1100000, the tomekkorbak/detoxify-pile-chunk3-1100000-1150000, the tomekkorbak/detoxify-pile-chunk3-1150000-1200000, the tomekkorbak/detoxify-pile-chunk3-1200000-1250000, the tomekkorbak/detoxify-pile-chunk3-1250000-1300000, the tomekkorbak/detoxify-pile-chunk3-1300000-1350000, the tomekkorbak/detoxify-pile-chunk3-1350000-1400000, the tomekkorbak/detoxify-pile-chunk3-1400000-1450000, the tomekkorbak/detoxify-pile-chunk3-1450000-1500000, the tomekkorbak/detoxify-pile-chunk3-1500000-1550000, the tomekkorbak/detoxify-pile-chunk3-1550000-1600000, the tomekkorbak/detoxify-pile-chunk3-1600000-1650000, the tomekkorbak/detoxify-pile-chunk3-1650000-1700000, the tomekkorbak/detoxify-pile-chunk3-1700000-1750000, the tomekkorbak/detoxify-pile-chunk3-1750000-1800000 and the tomekkorbak/detoxify-pile-chunk3-1800000-1850000 datasets.
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 0.0005
+- train_batch_size: 32
+- eval_batch_size: 16
 - seed: 42
+- gradient_accumulation_steps: 2
 - total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 # Full config
 {'dataset': {'conditional_training_config': {'aligned_prefix': '<|aligned|>',
+                                             'drop_token_fraction': 0.02,
                                              'misaligned_prefix': '<|misaligned|>',
                                              'threshold': 0.0007848},
+             'datasets': ['tomekkorbak/detoxify-pile-chunk3-50000-100000',
                           'tomekkorbak/detoxify-pile-chunk3-100000-150000',
                           'tomekkorbak/detoxify-pile-chunk3-150000-200000',
                           'tomekkorbak/detoxify-pile-chunk3-200000-250000',
                           'tomekkorbak/detoxify-pile-chunk3-1650000-1700000',
                           'tomekkorbak/detoxify-pile-chunk3-1700000-1750000',
                           'tomekkorbak/detoxify-pile-chunk3-1750000-1800000',
+                          'tomekkorbak/detoxify-pile-chunk3-1800000-1850000'],
              'is_split_by_sentences': True},
  'generation': {'force_call_on': [21362],
                 'metrics_configs': [{}, {'n': 1}, {'n': 2}, {'n': 5}],
                                                           'top_k': 0,
                                                           'top_p': 0.9},
                                       'name': 'unconditional',
+                                      'num_samples': 2048,
                                       'prefix': '<|aligned|>'},
                                      {'generate_kwargs': {'bad_words_ids': [[50257],
                                                                             [50258]],
  'kl_gpt3_callback': {'force_call_on': [21362],
                       'gpt3_kwargs': {'model_name': 'davinci'},
                       'max_tokens': 64,
+                      'num_samples': 2048,
                       'prefix': '<|aligned|>'},
  'model': {'from_scratch': True,
            'gpt2_config_kwargs': {'reorder_and_upcast_attn': True,
               'hub_strategy': 'all_checkpoints',
               'learning_rate': 0.0005,
               'logging_first_step': True,
+              'logging_steps': 50,
               'num_tokens': 2800000000.0,
               'output_dir': 'training_output30',
+              'per_device_train_batch_size': 16,
               'push_to_hub': True,
               'remove_unused_columns': False,
               'save_steps': 21362,
               'weight_decay': 0.1}}
 # Wandb URL:
+https://wandb.ai/kejian/uncategorized/runs/30tl243y