Upload folder using huggingface_hub
Browse files- train1/rwkv-0.pth +3 -0
- train1/rwkv-1.pth +3 -0
- train1/train_log.txt +14 -0
train1/rwkv-0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e25505f8dd53365e3f5391df6023fb62694fb18485439276a9a048eee7c7591c
|
3 |
+
size 1210925837
|
train1/rwkv-1.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dba77c2ca1ef278033280a05c0293dda5de7c25603e26c240abe6ae364c75deb
|
3 |
+
size 1210925837
|
train1/train_log.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
NEW RUN 2023-12-05-05-15-15
|
2 |
+
{'load_model': '/content/RWKV-LM-LoRA/RWKV-v4neo/RWKV-4-7B-world-one-novel-tuned-65k.pth', 'wandb': '', 'proj_dir': '/content/RWKV-LM-LoRA/RWKV-v4neo/lora/', 'random_seed': -1, 'data_file': '/content/RWKV-LM-LoRA/RWKV-v4neo/data_text_document', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 5000, 'epoch_count': 1000, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 16384, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 5e-05, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.999, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'lora': True, 'lora_load': '', 'lora_r': 768, 'lora_alpha': 512.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ln,time', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 4, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_2', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2023-12-05-05-15-15', 'betas': (0.9, 0.999), 'real_bsz': 2, 'run_name': '65536 ctx1024 L32 D4096'}
|
3 |
+
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 4, 'train_micro_batch_size_per_gpu': 2, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
|
4 |
+
NEW RUN 2023-12-05-05-15-15
|
5 |
+
{'load_model': '/content/RWKV-LM-LoRA/RWKV-v4neo/RWKV-4-7B-world-one-novel-tuned-65k.pth', 'wandb': '', 'proj_dir': '/content/RWKV-LM-LoRA/RWKV-v4neo/lora/', 'random_seed': -1, 'data_file': '/content/RWKV-LM-LoRA/RWKV-v4neo/data_text_document', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 5000, 'epoch_count': 1000, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 16384, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 5e-05, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.999, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'lora': True, 'lora_load': '', 'lora_r': 768, 'lora_alpha': 512.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ln,time', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 4, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_2', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2023-12-05-05-15-15', 'betas': (0.9, 0.999), 'real_bsz': 2, 'run_name': '65536 ctx1024 L32 D4096'}
|
6 |
+
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 4, 'train_micro_batch_size_per_gpu': 2, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
|
7 |
+
NEW RUN 2023-12-05-05-15-15
|
8 |
+
{'load_model': '/content/RWKV-LM-LoRA/RWKV-v4neo/RWKV-4-7B-world-one-novel-tuned-65k.pth', 'wandb': '', 'proj_dir': '/content/RWKV-LM-LoRA/RWKV-v4neo/lora/', 'random_seed': -1, 'data_file': '/content/RWKV-LM-LoRA/RWKV-v4neo/data_text_document', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 5000, 'epoch_count': 1000, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 16384, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 5e-05, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.999, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'lora': True, 'lora_load': '', 'lora_r': 768, 'lora_alpha': 512.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ln,time', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 4, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_2', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2023-12-05-05-15-15', 'betas': (0.9, 0.999), 'real_bsz': 2, 'run_name': '65536 ctx1024 L32 D4096'}
|
9 |
+
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 4, 'train_micro_batch_size_per_gpu': 2, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
|
10 |
+
NEW RUN 2023-12-05-05-15-15
|
11 |
+
{'load_model': '/content/RWKV-LM-LoRA/RWKV-v4neo/RWKV-4-7B-world-one-novel-tuned-65k.pth', 'wandb': '', 'proj_dir': '/content/RWKV-LM-LoRA/RWKV-v4neo/lora/', 'random_seed': -1, 'data_file': '/content/RWKV-LM-LoRA/RWKV-v4neo/data_text_document', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 5000, 'epoch_count': 1000, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 16384, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 5e-05, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.999, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'lora': True, 'lora_load': '', 'lora_r': 768, 'lora_alpha': 512.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ln,time', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 4, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_2', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2023-12-05-05-15-15', 'betas': (0.9, 0.999), 'real_bsz': 2, 'run_name': '65536 ctx1024 L32 D4096'}
|
12 |
+
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 4, 'train_micro_batch_size_per_gpu': 2, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
|
13 |
+
0 2.217211 9.1817 0.00004998 2023-12-05 06:03:21.680230 0
|
14 |
+
1 2.177034 8.8201 0.00004996 2023-12-05 06:47:52.540017 1
|