bibekyess commited on
Commit
b1a7e7e
1 Parent(s): 78261d2

Training in progress, step 2000

Browse files
Files changed (34) hide show
  1. adapter_config.json +33 -0
  2. adapter_model.safetensors +3 -0
  3. preprocessed_data/augmented_train.csv +0 -0
  4. preprocessed_data/augmented_train_2.csv +0 -0
  5. runs/Mar09_16-47-28_f191c6c9daaa/events.out.tfevents.1710002942.f191c6c9daaa.148.0 +3 -0
  6. runs/Mar09_16-55-33_f191c6c9daaa/events.out.tfevents.1710003341.f191c6c9daaa.12263.0 +3 -0
  7. runs/Mar09_16-58-32_f191c6c9daaa/events.out.tfevents.1710003530.f191c6c9daaa.12263.1 +3 -0
  8. runs/Mar09_17-04-24_f191c6c9daaa/events.out.tfevents.1710003871.f191c6c9daaa.14775.0 +3 -0
  9. runs/Mar10_03-18-22_df99ceffa61d/events.out.tfevents.1710040720.df99ceffa61d.323.0 +3 -0
  10. runs/Mar10_03-19-48_df99ceffa61d/events.out.tfevents.1710040896.df99ceffa61d.323.1 +3 -0
  11. runs/Mar10_03-22-00_df99ceffa61d/events.out.tfevents.1710040949.df99ceffa61d.323.2 +3 -0
  12. runs/Mar10_03-30-49_df99ceffa61d/events.out.tfevents.1710041490.df99ceffa61d.323.3 +3 -0
  13. runs/Mar10_06-48-48_17daf5749447/events.out.tfevents.1710053395.17daf5749447.929.0 +3 -0
  14. special_tokens_map.json +18 -0
  15. tokenizer.json +0 -0
  16. tokenizer_config.json +0 -0
  17. training_args.bin +3 -0
  18. wandb/debug-cli.root.log +0 -0
  19. wandb/debug-internal.log +0 -0
  20. wandb/debug.log +30 -0
  21. wandb/run-20240310_031850-h5a6szhj/files/config.yaml +664 -0
  22. wandb/run-20240310_031850-h5a6szhj/files/output.log +12 -0
  23. wandb/run-20240310_031850-h5a6szhj/files/requirements.txt +500 -0
  24. wandb/run-20240310_031850-h5a6szhj/files/wandb-metadata.json +52 -0
  25. wandb/run-20240310_031850-h5a6szhj/logs/debug.log +77 -0
  26. wandb/run-20240310_031850-h5a6szhj/run-h5a6szhj.wandb +0 -0
  27. wandb/run-20240310_065024-lx2gw13k/files/config.yaml +680 -0
  28. wandb/run-20240310_065024-lx2gw13k/files/output.log +6 -0
  29. wandb/run-20240310_065024-lx2gw13k/files/requirements.txt +500 -0
  30. wandb/run-20240310_065024-lx2gw13k/files/wandb-metadata.json +52 -0
  31. wandb/run-20240310_065024-lx2gw13k/files/wandb-summary.json +1 -0
  32. wandb/run-20240310_065024-lx2gw13k/logs/debug-internal.log +0 -0
  33. wandb/run-20240310_065024-lx2gw13k/logs/debug.log +30 -0
  34. wandb/run-20240310_065024-lx2gw13k/run-lx2gw13k.wandb +0 -0
adapter_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Edentns/DataVortexS-10.7B-dpo-v1.11",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 128,
13
+ "lora_dropout": 0.05,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 64,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "o_proj",
23
+ "k_proj",
24
+ "gate_proj",
25
+ "up_proj",
26
+ "v_proj",
27
+ "down_proj",
28
+ "q_proj"
29
+ ],
30
+ "task_type": "CAUSAL_LM",
31
+ "use_dora": false,
32
+ "use_rslora": false
33
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb282a193e1395598ece0a26d494255e4a01998e06adeb1111c57000f4f8db1
3
+ size 1006723888
preprocessed_data/augmented_train.csv ADDED
The diff for this file is too large to render. See raw diff
 
preprocessed_data/augmented_train_2.csv ADDED
The diff for this file is too large to render. See raw diff
 
runs/Mar09_16-47-28_f191c6c9daaa/events.out.tfevents.1710002942.f191c6c9daaa.148.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:937a2512af333a158ccb08c5723349dcfda08b039b922fb124780c3ff3565270
3
+ size 88
runs/Mar09_16-55-33_f191c6c9daaa/events.out.tfevents.1710003341.f191c6c9daaa.12263.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d744b4b0b56fcdfe4bf90a2c4041a3a3135370d3d6187e4d0bc449962c93d0cc
3
+ size 5213
runs/Mar09_16-58-32_f191c6c9daaa/events.out.tfevents.1710003530.f191c6c9daaa.12263.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffb18d531e5c5d9ee4ddd498fba2e4c938b8d3a4d6402497a2d51cb3fc28ba8f
3
+ size 88
runs/Mar09_17-04-24_f191c6c9daaa/events.out.tfevents.1710003871.f191c6c9daaa.14775.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb2a206c8f65fe8f03a1aff3b5a5ed8d02eae1076ba66251bdef0035accfe4f8
3
+ size 88
runs/Mar10_03-18-22_df99ceffa61d/events.out.tfevents.1710040720.df99ceffa61d.323.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4db16b9999626263a3ea52181fa87a77b6eb6cffc71d189d819d2f9caaf8e68a
3
+ size 5185
runs/Mar10_03-19-48_df99ceffa61d/events.out.tfevents.1710040896.df99ceffa61d.323.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e02e989db47e4e3a143b302d352f8627bbfe565c1adc808ec88519e333f58abb
3
+ size 5184
runs/Mar10_03-22-00_df99ceffa61d/events.out.tfevents.1710040949.df99ceffa61d.323.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13c99ac5b361e88f30fe3b42328f96db3f72801164acd4eb377ee4b70c11cf91
3
+ size 5185
runs/Mar10_03-30-49_df99ceffa61d/events.out.tfevents.1710041490.df99ceffa61d.323.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9124b73ff612dfa665f4815efd2166611e939ecc900d5e8b1a69be9fbb27976
3
+ size 5185
runs/Mar10_06-48-48_17daf5749447/events.out.tfevents.1710053395.17daf5749447.929.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29822f4369a998315fcd35b76020ee51c77f9a0a59a98c3cfbffd0ce2a481d15
3
+ size 5607
special_tokens_map.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": "###",
10
+ "pad_token": "###",
11
+ "unk_token": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ }
18
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb1c6cda846927c63f8fb6d781f64a30d892d5d0815a91d93919c2820e6dbf56
3
+ size 4984
wandb/debug-cli.root.log ADDED
File without changes
wandb/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/debug.log ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-03-10 06:50:24,735 INFO MainThread:929 [wandb_setup.py:_flush():76] Current SDK version is 0.16.4
2
+ 2024-03-10 06:50:24,736 INFO MainThread:929 [wandb_setup.py:_flush():76] Configure stats pid to 929
3
+ 2024-03-10 06:50:24,737 INFO MainThread:929 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
4
+ 2024-03-10 06:50:24,737 INFO MainThread:929 [wandb_setup.py:_flush():76] Loading settings from /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/settings
5
+ 2024-03-10 06:50:24,737 INFO MainThread:929 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-03-10 06:50:24,737 INFO MainThread:929 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-03-10 06:50:24,738 INFO MainThread:929 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
8
+ 2024-03-10 06:50:24,738 INFO MainThread:929 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
9
+ 2024-03-10 06:50:24,739 INFO MainThread:929 [wandb_init.py:_log_setup():526] Logging user logs to /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/run-20240310_065024-lx2gw13k/logs/debug.log
10
+ 2024-03-10 06:50:24,740 INFO MainThread:929 [wandb_init.py:_log_setup():527] Logging internal logs to /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/run-20240310_065024-lx2gw13k/logs/debug-internal.log
11
+ 2024-03-10 06:50:24,740 INFO MainThread:929 [wandb_init.py:_jupyter_setup():472] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7aa380ba0220>
12
+ 2024-03-10 06:50:24,741 INFO MainThread:929 [wandb_init.py:init():566] calling init triggers
13
+ 2024-03-10 06:50:24,741 INFO MainThread:929 [wandb_init.py:init():573] wandb.init called with sweep_config: {}
14
+ config: {}
15
+ 2024-03-10 06:50:24,741 INFO MainThread:929 [wandb_init.py:init():616] starting backend
16
+ 2024-03-10 06:50:24,742 INFO MainThread:929 [wandb_init.py:init():620] setting up manager
17
+ 2024-03-10 06:50:24,749 INFO MainThread:929 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2024-03-10 06:50:24,751 INFO MainThread:929 [wandb_init.py:init():628] backend started and connected
19
+ 2024-03-10 06:50:24,833 INFO MainThread:929 [wandb_run.py:_label_probe_notebook():1295] probe notebook
20
+ 2024-03-10 06:50:27,302 INFO MainThread:929 [wandb_init.py:init():720] updated telemetry
21
+ 2024-03-10 06:50:27,312 INFO MainThread:929 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
22
+ 2024-03-10 06:50:27,741 INFO MainThread:929 [wandb_run.py:_on_init():2262] communicating current version
23
+ 2024-03-10 06:50:27,844 INFO MainThread:929 [wandb_run.py:_on_init():2271] got version response
24
+ 2024-03-10 06:50:27,844 INFO MainThread:929 [wandb_init.py:init():804] starting run threads in backend
25
+ 2024-03-10 06:50:28,339 INFO MainThread:929 [wandb_run.py:_console_start():2241] atexit reg
26
+ 2024-03-10 06:50:28,339 INFO MainThread:929 [wandb_run.py:_redirect():2096] redirect: wrap_raw
27
+ 2024-03-10 06:50:28,340 INFO MainThread:929 [wandb_run.py:_redirect():2161] Wrapping output streams.
28
+ 2024-03-10 06:50:28,340 INFO MainThread:929 [wandb_run.py:_redirect():2186] Redirects installed.
29
+ 2024-03-10 06:50:28,343 INFO MainThread:929 [wandb_init.py:init():847] run started, returning control to user process
30
+ 2024-03-10 06:50:28,351 INFO MainThread:929 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 48000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 48, 'num_attention_heads': 32, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 2, 'eos_token_id': 32000, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'Edentns/DataVortexS-10.7B-dpo-v1.11', 'transformers_version': '4.38.2', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/content/drive/MyDrive/best-one', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'constant', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/content/drive/MyDrive/best-one/runs/Mar10_06-48-48_17daf5749447', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/content/drive/MyDrive/best-one', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch_fused', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': '/content/drive/MyDrive/best-one/checkpoint-1000', 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
wandb/run-20240310_031850-h5a6szhj/files/config.yaml ADDED
@@ -0,0 +1,664 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.10.12
7
+ cli_version: 0.16.4
8
+ framework: huggingface
9
+ huggingface_version: 4.38.2
10
+ is_jupyter_run: true
11
+ is_kaggle_kernel: false
12
+ start_time: 1710040730.0
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 2
17
+ - 3
18
+ - 5
19
+ - 11
20
+ - 12
21
+ - 49
22
+ - 51
23
+ - 53
24
+ - 55
25
+ - 71
26
+ - 84
27
+ - 98
28
+ 2:
29
+ - 1
30
+ - 2
31
+ - 3
32
+ - 5
33
+ - 11
34
+ - 12
35
+ - 49
36
+ - 51
37
+ - 53
38
+ - 55
39
+ - 71
40
+ - 84
41
+ - 98
42
+ 3:
43
+ - 7
44
+ - 23
45
+ 4: 3.10.12
46
+ 5: 0.16.4
47
+ 6: 4.38.2
48
+ 8:
49
+ - 1
50
+ - 5
51
+ - 12
52
+ 9:
53
+ 1: transformers_trainer
54
+ 13: linux-x86_64
55
+ m:
56
+ - 1: train/global_step
57
+ 6:
58
+ - 3
59
+ vocab_size:
60
+ desc: null
61
+ value: 48000
62
+ max_position_embeddings:
63
+ desc: null
64
+ value: 4096
65
+ hidden_size:
66
+ desc: null
67
+ value: 4096
68
+ intermediate_size:
69
+ desc: null
70
+ value: 14336
71
+ num_hidden_layers:
72
+ desc: null
73
+ value: 48
74
+ num_attention_heads:
75
+ desc: null
76
+ value: 32
77
+ num_key_value_heads:
78
+ desc: null
79
+ value: 8
80
+ hidden_act:
81
+ desc: null
82
+ value: silu
83
+ initializer_range:
84
+ desc: null
85
+ value: 0.02
86
+ rms_norm_eps:
87
+ desc: null
88
+ value: 1.0e-05
89
+ pretraining_tp:
90
+ desc: null
91
+ value: 1
92
+ use_cache:
93
+ desc: null
94
+ value: true
95
+ rope_theta:
96
+ desc: null
97
+ value: 10000.0
98
+ rope_scaling:
99
+ desc: null
100
+ value: null
101
+ attention_bias:
102
+ desc: null
103
+ value: false
104
+ attention_dropout:
105
+ desc: null
106
+ value: 0.0
107
+ return_dict:
108
+ desc: null
109
+ value: true
110
+ output_hidden_states:
111
+ desc: null
112
+ value: false
113
+ output_attentions:
114
+ desc: null
115
+ value: false
116
+ torchscript:
117
+ desc: null
118
+ value: false
119
+ torch_dtype:
120
+ desc: null
121
+ value: float16
122
+ use_bfloat16:
123
+ desc: null
124
+ value: false
125
+ tf_legacy_loss:
126
+ desc: null
127
+ value: false
128
+ pruned_heads:
129
+ desc: null
130
+ value: {}
131
+ tie_word_embeddings:
132
+ desc: null
133
+ value: false
134
+ chunk_size_feed_forward:
135
+ desc: null
136
+ value: 0
137
+ is_encoder_decoder:
138
+ desc: null
139
+ value: false
140
+ is_decoder:
141
+ desc: null
142
+ value: false
143
+ cross_attention_hidden_size:
144
+ desc: null
145
+ value: null
146
+ add_cross_attention:
147
+ desc: null
148
+ value: false
149
+ tie_encoder_decoder:
150
+ desc: null
151
+ value: false
152
+ max_length:
153
+ desc: null
154
+ value: 20
155
+ min_length:
156
+ desc: null
157
+ value: 0
158
+ do_sample:
159
+ desc: null
160
+ value: false
161
+ early_stopping:
162
+ desc: null
163
+ value: false
164
+ num_beams:
165
+ desc: null
166
+ value: 1
167
+ num_beam_groups:
168
+ desc: null
169
+ value: 1
170
+ diversity_penalty:
171
+ desc: null
172
+ value: 0.0
173
+ temperature:
174
+ desc: null
175
+ value: 1.0
176
+ top_k:
177
+ desc: null
178
+ value: 50
179
+ top_p:
180
+ desc: null
181
+ value: 1.0
182
+ typical_p:
183
+ desc: null
184
+ value: 1.0
185
+ repetition_penalty:
186
+ desc: null
187
+ value: 1.0
188
+ length_penalty:
189
+ desc: null
190
+ value: 1.0
191
+ no_repeat_ngram_size:
192
+ desc: null
193
+ value: 0
194
+ encoder_no_repeat_ngram_size:
195
+ desc: null
196
+ value: 0
197
+ bad_words_ids:
198
+ desc: null
199
+ value: null
200
+ num_return_sequences:
201
+ desc: null
202
+ value: 1
203
+ output_scores:
204
+ desc: null
205
+ value: false
206
+ return_dict_in_generate:
207
+ desc: null
208
+ value: false
209
+ forced_bos_token_id:
210
+ desc: null
211
+ value: null
212
+ forced_eos_token_id:
213
+ desc: null
214
+ value: null
215
+ remove_invalid_values:
216
+ desc: null
217
+ value: false
218
+ exponential_decay_length_penalty:
219
+ desc: null
220
+ value: null
221
+ suppress_tokens:
222
+ desc: null
223
+ value: null
224
+ begin_suppress_tokens:
225
+ desc: null
226
+ value: null
227
+ architectures:
228
+ desc: null
229
+ value:
230
+ - LlamaForCausalLM
231
+ finetuning_task:
232
+ desc: null
233
+ value: null
234
+ id2label:
235
+ desc: null
236
+ value:
237
+ '0': LABEL_0
238
+ '1': LABEL_1
239
+ label2id:
240
+ desc: null
241
+ value:
242
+ LABEL_0: 0
243
+ LABEL_1: 1
244
+ tokenizer_class:
245
+ desc: null
246
+ value: null
247
+ prefix:
248
+ desc: null
249
+ value: null
250
+ bos_token_id:
251
+ desc: null
252
+ value: 1
253
+ pad_token_id:
254
+ desc: null
255
+ value: 2
256
+ eos_token_id:
257
+ desc: null
258
+ value: 32000
259
+ sep_token_id:
260
+ desc: null
261
+ value: null
262
+ decoder_start_token_id:
263
+ desc: null
264
+ value: null
265
+ task_specific_params:
266
+ desc: null
267
+ value: null
268
+ problem_type:
269
+ desc: null
270
+ value: null
271
+ _name_or_path:
272
+ desc: null
273
+ value: Edentns/DataVortexS-10.7B-dpo-v1.11
274
+ transformers_version:
275
+ desc: null
276
+ value: 4.38.2
277
+ model_type:
278
+ desc: null
279
+ value: llama
280
+ quantization_config:
281
+ desc: null
282
+ value:
283
+ quant_method: QuantizationMethod.BITS_AND_BYTES
284
+ _load_in_8bit: false
285
+ _load_in_4bit: true
286
+ llm_int8_threshold: 6.0
287
+ llm_int8_skip_modules: null
288
+ llm_int8_enable_fp32_cpu_offload: false
289
+ llm_int8_has_fp16_weight: false
290
+ bnb_4bit_quant_type: nf4
291
+ bnb_4bit_use_double_quant: true
292
+ bnb_4bit_compute_dtype: float16
293
+ load_in_4bit: true
294
+ load_in_8bit: false
295
+ output_dir:
296
+ desc: null
297
+ value: /content/drive/MyDrive/best-one
298
+ overwrite_output_dir:
299
+ desc: null
300
+ value: false
301
+ do_train:
302
+ desc: null
303
+ value: false
304
+ do_eval:
305
+ desc: null
306
+ value: false
307
+ do_predict:
308
+ desc: null
309
+ value: false
310
+ evaluation_strategy:
311
+ desc: null
312
+ value: 'no'
313
+ prediction_loss_only:
314
+ desc: null
315
+ value: false
316
+ per_device_train_batch_size:
317
+ desc: null
318
+ value: 1
319
+ per_device_eval_batch_size:
320
+ desc: null
321
+ value: 8
322
+ per_gpu_train_batch_size:
323
+ desc: null
324
+ value: null
325
+ per_gpu_eval_batch_size:
326
+ desc: null
327
+ value: null
328
+ gradient_accumulation_steps:
329
+ desc: null
330
+ value: 1
331
+ eval_accumulation_steps:
332
+ desc: null
333
+ value: null
334
+ eval_delay:
335
+ desc: null
336
+ value: 0
337
+ learning_rate:
338
+ desc: null
339
+ value: 0.0002
340
+ weight_decay:
341
+ desc: null
342
+ value: 0.0
343
+ adam_beta1:
344
+ desc: null
345
+ value: 0.9
346
+ adam_beta2:
347
+ desc: null
348
+ value: 0.999
349
+ adam_epsilon:
350
+ desc: null
351
+ value: 1.0e-08
352
+ max_grad_norm:
353
+ desc: null
354
+ value: 0.3
355
+ num_train_epochs:
356
+ desc: null
357
+ value: 4
358
+ max_steps:
359
+ desc: null
360
+ value: -1
361
+ lr_scheduler_type:
362
+ desc: null
363
+ value: constant
364
+ lr_scheduler_kwargs:
365
+ desc: null
366
+ value: {}
367
+ warmup_ratio:
368
+ desc: null
369
+ value: 0.03
370
+ warmup_steps:
371
+ desc: null
372
+ value: 0
373
+ log_level:
374
+ desc: null
375
+ value: passive
376
+ log_level_replica:
377
+ desc: null
378
+ value: warning
379
+ log_on_each_node:
380
+ desc: null
381
+ value: true
382
+ logging_dir:
383
+ desc: null
384
+ value: /content/drive/MyDrive/best-one/runs/Mar10_03-30-49_df99ceffa61d
385
+ logging_strategy:
386
+ desc: null
387
+ value: steps
388
+ logging_first_step:
389
+ desc: null
390
+ value: false
391
+ logging_steps:
392
+ desc: null
393
+ value: 500
394
+ logging_nan_inf_filter:
395
+ desc: null
396
+ value: true
397
+ save_strategy:
398
+ desc: null
399
+ value: steps
400
+ save_steps:
401
+ desc: null
402
+ value: 10
403
+ save_total_limit:
404
+ desc: null
405
+ value: null
406
+ save_safetensors:
407
+ desc: null
408
+ value: true
409
+ save_on_each_node:
410
+ desc: null
411
+ value: false
412
+ save_only_model:
413
+ desc: null
414
+ value: false
415
+ no_cuda:
416
+ desc: null
417
+ value: false
418
+ use_cpu:
419
+ desc: null
420
+ value: false
421
+ use_mps_device:
422
+ desc: null
423
+ value: false
424
+ seed:
425
+ desc: null
426
+ value: 42
427
+ data_seed:
428
+ desc: null
429
+ value: null
430
+ jit_mode_eval:
431
+ desc: null
432
+ value: false
433
+ use_ipex:
434
+ desc: null
435
+ value: false
436
+ bf16:
437
+ desc: null
438
+ value: false
439
+ fp16:
440
+ desc: null
441
+ value: true
442
+ fp16_opt_level:
443
+ desc: null
444
+ value: O1
445
+ half_precision_backend:
446
+ desc: null
447
+ value: auto
448
+ bf16_full_eval:
449
+ desc: null
450
+ value: false
451
+ fp16_full_eval:
452
+ desc: null
453
+ value: false
454
+ tf32:
455
+ desc: null
456
+ value: null
457
+ local_rank:
458
+ desc: null
459
+ value: 0
460
+ ddp_backend:
461
+ desc: null
462
+ value: null
463
+ tpu_num_cores:
464
+ desc: null
465
+ value: null
466
+ tpu_metrics_debug:
467
+ desc: null
468
+ value: false
469
+ debug:
470
+ desc: null
471
+ value: []
472
+ dataloader_drop_last:
473
+ desc: null
474
+ value: false
475
+ eval_steps:
476
+ desc: null
477
+ value: null
478
+ dataloader_num_workers:
479
+ desc: null
480
+ value: 0
481
+ dataloader_prefetch_factor:
482
+ desc: null
483
+ value: null
484
+ past_index:
485
+ desc: null
486
+ value: -1
487
+ run_name:
488
+ desc: null
489
+ value: /content/drive/MyDrive/best-one
490
+ disable_tqdm:
491
+ desc: null
492
+ value: false
493
+ remove_unused_columns:
494
+ desc: null
495
+ value: true
496
+ label_names:
497
+ desc: null
498
+ value: null
499
+ load_best_model_at_end:
500
+ desc: null
501
+ value: false
502
+ metric_for_best_model:
503
+ desc: null
504
+ value: null
505
+ greater_is_better:
506
+ desc: null
507
+ value: null
508
+ ignore_data_skip:
509
+ desc: null
510
+ value: false
511
+ fsdp:
512
+ desc: null
513
+ value: []
514
+ fsdp_min_num_params:
515
+ desc: null
516
+ value: 0
517
+ fsdp_config:
518
+ desc: null
519
+ value:
520
+ min_num_params: 0
521
+ xla: false
522
+ xla_fsdp_v2: false
523
+ xla_fsdp_grad_ckpt: false
524
+ fsdp_transformer_layer_cls_to_wrap:
525
+ desc: null
526
+ value: null
527
+ accelerator_config:
528
+ desc: null
529
+ value:
530
+ split_batches: false
531
+ dispatch_batches: null
532
+ even_batches: true
533
+ use_seedable_sampler: true
534
+ deepspeed:
535
+ desc: null
536
+ value: null
537
+ label_smoothing_factor:
538
+ desc: null
539
+ value: 0.0
540
+ optim:
541
+ desc: null
542
+ value: adamw_torch_fused
543
+ optim_args:
544
+ desc: null
545
+ value: null
546
+ adafactor:
547
+ desc: null
548
+ value: false
549
+ group_by_length:
550
+ desc: null
551
+ value: false
552
+ length_column_name:
553
+ desc: null
554
+ value: length
555
+ report_to:
556
+ desc: null
557
+ value:
558
+ - tensorboard
559
+ - wandb
560
+ ddp_find_unused_parameters:
561
+ desc: null
562
+ value: null
563
+ ddp_bucket_cap_mb:
564
+ desc: null
565
+ value: null
566
+ ddp_broadcast_buffers:
567
+ desc: null
568
+ value: null
569
+ dataloader_pin_memory:
570
+ desc: null
571
+ value: true
572
+ dataloader_persistent_workers:
573
+ desc: null
574
+ value: false
575
+ skip_memory_metrics:
576
+ desc: null
577
+ value: true
578
+ use_legacy_prediction_loop:
579
+ desc: null
580
+ value: false
581
+ push_to_hub:
582
+ desc: null
583
+ value: false
584
+ resume_from_checkpoint:
585
+ desc: null
586
+ value: /content/drive/MyDrive/best-one/checkpoint-1000
587
+ hub_model_id:
588
+ desc: null
589
+ value: null
590
+ hub_strategy:
591
+ desc: null
592
+ value: every_save
593
+ hub_token:
594
+ desc: null
595
+ value: <HUB_TOKEN>
596
+ hub_private_repo:
597
+ desc: null
598
+ value: false
599
+ hub_always_push:
600
+ desc: null
601
+ value: false
602
+ gradient_checkpointing:
603
+ desc: null
604
+ value: true
605
+ gradient_checkpointing_kwargs:
606
+ desc: null
607
+ value: null
608
+ include_inputs_for_metrics:
609
+ desc: null
610
+ value: false
611
+ fp16_backend:
612
+ desc: null
613
+ value: auto
614
+ push_to_hub_model_id:
615
+ desc: null
616
+ value: null
617
+ push_to_hub_organization:
618
+ desc: null
619
+ value: null
620
+ push_to_hub_token:
621
+ desc: null
622
+ value: <PUSH_TO_HUB_TOKEN>
623
+ mp_parameters:
624
+ desc: null
625
+ value: ''
626
+ auto_find_batch_size:
627
+ desc: null
628
+ value: false
629
+ full_determinism:
630
+ desc: null
631
+ value: false
632
+ torchdynamo:
633
+ desc: null
634
+ value: null
635
+ ray_scope:
636
+ desc: null
637
+ value: last
638
+ ddp_timeout:
639
+ desc: null
640
+ value: 1800
641
+ torch_compile:
642
+ desc: null
643
+ value: false
644
+ torch_compile_backend:
645
+ desc: null
646
+ value: null
647
+ torch_compile_mode:
648
+ desc: null
649
+ value: null
650
+ dispatch_batches:
651
+ desc: null
652
+ value: null
653
+ split_batches:
654
+ desc: null
655
+ value: null
656
+ include_tokens_per_second:
657
+ desc: null
658
+ value: false
659
+ include_num_input_tokens_seen:
660
+ desc: null
661
+ value: false
662
+ neftune_noise_alpha:
663
+ desc: null
664
+ value: null
wandb/run-20240310_031850-h5a6szhj/files/output.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
2
+ /usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
3
+ warnings.warn(
4
+ /usr/local/lib/python3.10/dist-packages/trl/trainer/utils.py:434: UserWarning: The passed formatting_func has more than one argument. Usually that function should have a single argument `example` which corresponds to the dictionary returned by each element of the dataset. Make sure you know what you are doing.
5
+ warnings.warn(
6
+ /usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
7
+ warnings.warn(
8
+ /usr/local/lib/python3.10/dist-packages/trl/trainer/utils.py:434: UserWarning: The passed formatting_func has more than one argument. Usually that function should have a single argument `example` which corresponds to the dictionary returned by each element of the dataset. Make sure you know what you are doing.
9
+ warnings.warn(
10
+ /usr/local/lib/python3.10/dist-packages/trl/trainer/utils.py:434: UserWarning: The passed formatting_func has more than one argument. Usually that function should have a single argument `example` which corresponds to the dictionary returned by each element of the dataset. Make sure you know what you are doing.
11
+ warnings.warn(
12
+ /usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
wandb/run-20240310_031850-h5a6szhj/files/requirements.txt ADDED
@@ -0,0 +1,500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Babel==2.14.0
2
+ CacheControl==0.14.0
3
+ Cython==3.0.9
4
+ Flask==2.2.5
5
+ GDAL==3.6.4
6
+ GitPython==3.1.42
7
+ Jinja2==3.1.3
8
+ Markdown==3.5.2
9
+ MarkupSafe==2.1.5
10
+ Pillow==9.4.0
11
+ PyDrive2==1.6.3
12
+ PyDrive==1.3.1
13
+ PyGObject==3.42.1
14
+ PyJWT==2.3.0
15
+ PyOpenGL==3.1.7
16
+ PySocks==1.7.1
17
+ PyWavelets==1.5.0
18
+ PyYAML==6.0.1
19
+ Pygments==2.16.1
20
+ SQLAlchemy==2.0.28
21
+ SecretStorage==3.3.1
22
+ Send2Trash==1.8.2
23
+ Sphinx==5.0.2
24
+ Werkzeug==3.0.1
25
+ absl-py==1.4.0
26
+ accelerate==0.27.2
27
+ aiohttp==3.9.3
28
+ aiosignal==1.3.1
29
+ alabaster==0.7.16
30
+ albumentations==1.3.1
31
+ altair==4.2.2
32
+ annotated-types==0.6.0
33
+ anyio==3.7.1
34
+ appdirs==1.4.4
35
+ argon2-cffi-bindings==21.2.0
36
+ argon2-cffi==23.1.0
37
+ array-record==0.5.0
38
+ arviz==0.15.1
39
+ astropy==5.3.4
40
+ astunparse==1.6.3
41
+ async-timeout==4.0.3
42
+ atpublic==4.0
43
+ attrs==23.2.0
44
+ audioread==3.0.1
45
+ autograd==1.6.2
46
+ backcall==0.2.0
47
+ beautifulsoup4==4.12.3
48
+ bidict==0.23.1
49
+ bigframes==0.22.0
50
+ bitsandbytes==0.43.0
51
+ bleach==6.1.0
52
+ blinker==1.4
53
+ blis==0.7.11
54
+ blosc2==2.0.0
55
+ bokeh==3.3.4
56
+ bqplot==0.12.43
57
+ branca==0.7.1
58
+ build==1.1.1
59
+ cachetools==5.3.3
60
+ catalogue==2.0.10
61
+ certifi==2024.2.2
62
+ cffi==1.16.0
63
+ chardet==5.2.0
64
+ charset-normalizer==3.3.2
65
+ chex==0.1.85
66
+ click-plugins==1.1.1
67
+ click==8.1.7
68
+ cligj==0.7.2
69
+ cloudpathlib==0.16.0
70
+ cloudpickle==2.2.1
71
+ cmake==3.27.9
72
+ cmdstanpy==1.2.1
73
+ colorcet==3.1.0
74
+ colorlover==0.3.0
75
+ colour==0.1.5
76
+ community==1.0.0b1
77
+ confection==0.1.4
78
+ cons==0.4.6
79
+ contextlib2==21.6.0
80
+ contourpy==1.2.0
81
+ cryptography==42.0.5
82
+ cufflinks==0.17.3
83
+ cupy-cuda12x==12.2.0
84
+ cvxopt==1.3.2
85
+ cvxpy==1.3.3
86
+ cycler==0.12.1
87
+ cymem==2.0.8
88
+ dask==2023.8.1
89
+ datascience==0.17.6
90
+ datasets==2.18.0
91
+ db-dtypes==1.2.0
92
+ dbus-python==1.2.18
93
+ debugpy==1.6.6
94
+ decorator==4.4.2
95
+ defusedxml==0.7.1
96
+ dill==0.3.8
97
+ distributed==2023.8.1
98
+ distro==1.7.0
99
+ dlib==19.24.2
100
+ dm-tree==0.1.8
101
+ docker-pycreds==0.4.0
102
+ docstring-parser==0.15
103
+ docutils==0.18.1
104
+ dopamine-rl==4.0.6
105
+ duckdb==0.9.2
106
+ earthengine-api==0.1.392
107
+ easydict==1.13
108
+ ecos==2.0.13
109
+ editdistance==0.6.2
110
+ eerepr==0.0.4
111
+ en-core-web-sm==3.7.1
112
+ entrypoints==0.4
113
+ et-xmlfile==1.1.0
114
+ etils==1.7.0
115
+ etuples==0.3.9
116
+ exceptiongroup==1.2.0
117
+ fastai==2.7.14
118
+ fastcore==1.5.29
119
+ fastdownload==0.0.7
120
+ fastjsonschema==2.19.1
121
+ fastprogress==1.0.3
122
+ fastrlock==0.8.2
123
+ filelock==3.13.1
124
+ fiona==1.9.5
125
+ firebase-admin==5.3.0
126
+ flatbuffers==23.5.26
127
+ flax==0.8.1
128
+ folium==0.14.0
129
+ fonttools==4.49.0
130
+ frozendict==2.4.0
131
+ frozenlist==1.4.1
132
+ fsspec==2023.6.0
133
+ future==0.18.3
134
+ gast==0.5.4
135
+ gcsfs==2023.6.0
136
+ gdown==4.7.3
137
+ geemap==0.32.0
138
+ gensim==4.3.2
139
+ geocoder==1.38.1
140
+ geographiclib==2.0
141
+ geopandas==0.13.2
142
+ geopy==2.3.0
143
+ gin-config==0.5.0
144
+ gitdb==4.0.11
145
+ glob2==0.7
146
+ google-ai-generativelanguage==0.4.0
147
+ google-api-core==2.11.1
148
+ google-api-python-client==2.84.0
149
+ google-auth-httplib2==0.1.1
150
+ google-auth-oauthlib==1.2.0
151
+ google-auth==2.27.0
152
+ google-cloud-aiplatform==1.43.0
153
+ google-cloud-bigquery-connection==1.12.1
154
+ google-cloud-bigquery-storage==2.24.0
155
+ google-cloud-bigquery==3.12.0
156
+ google-cloud-core==2.3.3
157
+ google-cloud-datastore==2.15.2
158
+ google-cloud-firestore==2.11.1
159
+ google-cloud-functions==1.13.3
160
+ google-cloud-iam==2.14.3
161
+ google-cloud-language==2.13.3
162
+ google-cloud-resource-manager==1.12.3
163
+ google-cloud-storage==2.8.0
164
+ google-cloud-translate==3.11.3
165
+ google-colab==1.0.0
166
+ google-crc32c==1.5.0
167
+ google-generativeai==0.3.2
168
+ google-pasta==0.2.0
169
+ google-resumable-media==2.7.0
170
+ google==2.0.3
171
+ googleapis-common-protos==1.62.0
172
+ googledrivedownloader==0.4
173
+ graphviz==0.20.1
174
+ greenlet==3.0.3
175
+ grpc-google-iam-v1==0.13.0
176
+ grpcio-status==1.48.2
177
+ grpcio==1.62.0
178
+ gspread-dataframe==3.3.1
179
+ gspread==3.4.2
180
+ gym-notices==0.0.8
181
+ gym==0.25.2
182
+ h5netcdf==1.3.0
183
+ h5py==3.9.0
184
+ holidays==0.44
185
+ holoviews==1.17.1
186
+ html5lib==1.1
187
+ httpimport==1.3.1
188
+ httplib2==0.22.0
189
+ huggingface-hub==0.20.3
190
+ humanize==4.7.0
191
+ hyperopt==0.2.7
192
+ ibis-framework==7.1.0
193
+ idna==3.6
194
+ imageio-ffmpeg==0.4.9
195
+ imageio==2.31.6
196
+ imagesize==1.4.1
197
+ imbalanced-learn==0.10.1
198
+ imgaug==0.4.0
199
+ importlib-metadata==7.0.1
200
+ importlib_resources==6.1.2
201
+ imutils==0.5.4
202
+ inflect==7.0.0
203
+ iniconfig==2.0.0
204
+ intel-openmp==2023.2.3
205
+ ipyevents==2.0.2
206
+ ipyfilechooser==0.6.0
207
+ ipykernel==5.5.6
208
+ ipyleaflet==0.18.2
209
+ ipython-genutils==0.2.0
210
+ ipython-sql==0.5.0
211
+ ipython==7.34.0
212
+ ipytree==0.2.2
213
+ ipywidgets==7.7.1
214
+ itsdangerous==2.1.2
215
+ jax==0.4.23
216
+ jaxlib==0.4.23+cuda12.cudnn89
217
+ jeepney==0.7.1
218
+ jieba==0.42.1
219
+ joblib==1.3.2
220
+ jsonpickle==3.0.3
221
+ jsonschema-specifications==2023.12.1
222
+ jsonschema==4.19.2
223
+ jupyter-client==6.1.12
224
+ jupyter-console==6.1.0
225
+ jupyter-server==1.24.0
226
+ jupyter_core==5.7.1
227
+ jupyterlab_pygments==0.3.0
228
+ jupyterlab_widgets==3.0.10
229
+ kaggle==1.5.16
230
+ kagglehub==0.2.0
231
+ keras==2.15.0
232
+ keyring==23.5.0
233
+ kiwisolver==1.4.5
234
+ langcodes==3.3.0
235
+ launchpadlib==1.10.16
236
+ lazr.restfulclient==0.14.4
237
+ lazr.uri==1.0.6
238
+ lazy_loader==0.3
239
+ libclang==16.0.6
240
+ librosa==0.10.1
241
+ lightgbm==4.1.0
242
+ linkify-it-py==2.0.3
243
+ llvmlite==0.41.1
244
+ locket==1.0.0
245
+ logical-unification==0.4.6
246
+ lxml==4.9.4
247
+ malloy==2023.1067
248
+ markdown-it-py==3.0.0
249
+ matplotlib-inline==0.1.6
250
+ matplotlib-venn==0.11.10
251
+ matplotlib==3.7.1
252
+ mdit-py-plugins==0.4.0
253
+ mdurl==0.1.2
254
+ miniKanren==1.0.3
255
+ missingno==0.5.2
256
+ mistune==0.8.4
257
+ mizani==0.9.3
258
+ mkl==2023.2.0
259
+ ml-dtypes==0.2.0
260
+ mlxtend==0.22.0
261
+ more-itertools==10.1.0
262
+ moviepy==1.0.3
263
+ mpmath==1.3.0
264
+ msgpack==1.0.8
265
+ multidict==6.0.5
266
+ multipledispatch==1.0.0
267
+ multiprocess==0.70.16
268
+ multitasking==0.0.11
269
+ murmurhash==1.0.10
270
+ music21==9.1.0
271
+ natsort==8.4.0
272
+ nbclassic==1.0.0
273
+ nbclient==0.9.0
274
+ nbconvert==6.5.4
275
+ nbformat==5.9.2
276
+ nest-asyncio==1.6.0
277
+ networkx==3.2.1
278
+ nibabel==4.0.2
279
+ nltk==3.8.1
280
+ notebook==6.5.5
281
+ notebook_shim==0.2.4
282
+ numba==0.58.1
283
+ numexpr==2.9.0
284
+ numpy==1.25.2
285
+ oauth2client==4.1.3
286
+ oauthlib==3.2.2
287
+ opencv-contrib-python==4.8.0.76
288
+ opencv-python-headless==4.9.0.80
289
+ opencv-python==4.8.0.76
290
+ openpyxl==3.1.2
291
+ opt-einsum==3.3.0
292
+ optax==0.1.9
293
+ orbax-checkpoint==0.4.4
294
+ osqp==0.6.2.post8
295
+ packaging==23.2
296
+ pandas-datareader==0.10.0
297
+ pandas-gbq==0.19.2
298
+ pandas-stubs==1.5.3.230304
299
+ pandas==2.2.1
300
+ pandocfilters==1.5.1
301
+ panel==1.3.8
302
+ param==2.0.2
303
+ parso==0.8.3
304
+ parsy==2.1
305
+ partd==1.4.1
306
+ pathlib==1.0.1
307
+ patsy==0.5.6
308
+ peewee==3.17.1
309
+ peft==0.9.0
310
+ pexpect==4.9.0
311
+ pickleshare==0.7.5
312
+ pins==0.8.4
313
+ pip-tools==6.13.0
314
+ pip==23.1.2
315
+ platformdirs==4.2.0
316
+ plotly==5.15.0
317
+ plotnine==0.12.4
318
+ pluggy==1.4.0
319
+ polars==0.20.2
320
+ pooch==1.8.1
321
+ portpicker==1.5.2
322
+ prefetch-generator==1.0.3
323
+ preshed==3.0.9
324
+ prettytable==3.10.0
325
+ proglog==0.1.10
326
+ progressbar2==4.2.0
327
+ prometheus_client==0.20.0
328
+ promise==2.3
329
+ prompt-toolkit==3.0.43
330
+ prophet==1.1.5
331
+ proto-plus==1.23.0
332
+ protobuf==3.20.3
333
+ psutil==5.9.5
334
+ psycopg2==2.9.9
335
+ ptyprocess==0.7.0
336
+ py-cpuinfo==9.0.0
337
+ py4j==0.10.9.7
338
+ pyOpenSSL==24.0.0
339
+ pyarrow-hotfix==0.6
340
+ pyarrow==14.0.2
341
+ pyasn1-modules==0.3.0
342
+ pyasn1==0.5.1
343
+ pycocotools==2.0.7
344
+ pycparser==2.21
345
+ pydantic==2.6.3
346
+ pydantic_core==2.16.3
347
+ pydata-google-auth==1.8.2
348
+ pydot-ng==2.0.0
349
+ pydot==1.4.2
350
+ pydotplus==2.0.2
351
+ pyerfa==2.0.1.1
352
+ pygame==2.5.2
353
+ pymc==5.10.4
354
+ pymystem3==0.2.0
355
+ pyparsing==3.1.1
356
+ pyperclip==1.8.2
357
+ pyproj==3.6.1
358
+ pyproject_hooks==1.0.0
359
+ pyshp==2.3.1
360
+ pytensor==2.18.6
361
+ pytest==7.4.4
362
+ python-apt==0.0.0
363
+ python-box==7.1.1
364
+ python-dateutil==2.8.2
365
+ python-louvain==0.16
366
+ python-slugify==8.0.4
367
+ python-utils==3.8.2
368
+ pytz==2023.4
369
+ pyviz_comms==3.0.1
370
+ pyzmq==23.2.1
371
+ qdldl==0.1.7.post0
372
+ qudida==0.0.4
373
+ ratelim==0.1.6
374
+ referencing==0.33.0
375
+ regex==2023.12.25
376
+ requests-oauthlib==1.3.1
377
+ requests==2.31.0
378
+ requirements-parser==0.5.0
379
+ rich==13.7.1
380
+ rpds-py==0.18.0
381
+ rpy2==3.4.2
382
+ rsa==4.9
383
+ safetensors==0.4.2
384
+ scikit-image==0.19.3
385
+ scikit-learn==1.2.2
386
+ scipy==1.11.4
387
+ scooby==0.9.2
388
+ scs==3.2.4.post1
389
+ seaborn==0.13.1
390
+ sentence-transformers==2.5.1
391
+ sentencepiece==0.1.99
392
+ sentry-sdk==1.41.0
393
+ setproctitle==1.3.3
394
+ setuptools==67.7.2
395
+ shapely==2.0.3
396
+ shtab==1.7.1
397
+ six==1.16.0
398
+ six==1.16.0
399
+ sklearn-pandas==2.2.0
400
+ smart-open==6.4.0
401
+ smmap==5.0.1
402
+ sniffio==1.3.1
403
+ snowballstemmer==2.2.0
404
+ sortedcontainers==2.4.0
405
+ soundfile==0.12.1
406
+ soupsieve==2.5
407
+ soxr==0.3.7
408
+ spacy-legacy==3.0.12
409
+ spacy-loggers==1.0.5
410
+ spacy==3.7.4
411
+ sphinxcontrib-applehelp==1.0.8
412
+ sphinxcontrib-devhelp==1.0.6
413
+ sphinxcontrib-htmlhelp==2.0.5
414
+ sphinxcontrib-jsmath==1.0.1
415
+ sphinxcontrib-qthelp==1.0.7
416
+ sphinxcontrib-serializinghtml==1.1.10
417
+ sqlglot==19.9.0
418
+ sqlparse==0.4.4
419
+ srsly==2.4.8
420
+ stanio==0.3.0
421
+ statsmodels==0.14.1
422
+ sympy==1.12
423
+ tables==3.8.0
424
+ tabulate==0.9.0
425
+ tbb==2021.11.0
426
+ tblib==3.0.0
427
+ tenacity==8.2.3
428
+ tensorboard-data-server==0.7.2
429
+ tensorboard==2.15.2
430
+ tensorflow-datasets==4.9.4
431
+ tensorflow-estimator==2.15.0
432
+ tensorflow-gcs-config==2.15.0
433
+ tensorflow-hub==0.16.1
434
+ tensorflow-io-gcs-filesystem==0.36.0
435
+ tensorflow-metadata==1.14.0
436
+ tensorflow-probability==0.23.0
437
+ tensorflow==2.15.0
438
+ tensorstore==0.1.45
439
+ termcolor==2.4.0
440
+ terminado==0.18.0
441
+ text-unidecode==1.3
442
+ textblob==0.17.1
443
+ tf-keras==2.15.0
444
+ tf-slim==1.1.0
445
+ thinc==8.2.3
446
+ threadpoolctl==3.3.0
447
+ tifffile==2024.2.12
448
+ tinycss2==1.2.1
449
+ tokenizers==0.15.2
450
+ toml==0.10.2
451
+ tomli==2.0.1
452
+ toolz==0.12.1
453
+ torch==2.1.0+cu121
454
+ torchaudio==2.1.0+cu121
455
+ torchdata==0.7.0
456
+ torchsummary==1.5.1
457
+ torchtext==0.16.0
458
+ torchvision==0.16.0+cu121
459
+ tornado==6.3.3
460
+ tqdm==4.66.2
461
+ traitlets==5.7.1
462
+ traittypes==0.2.1
463
+ transformers==4.38.2
464
+ triton==2.1.0
465
+ trl==0.7.11
466
+ tweepy==4.14.0
467
+ typer==0.9.0
468
+ types-pytz==2024.1.0.20240203
469
+ types-setuptools==69.1.0.20240302
470
+ typing_extensions==4.10.0
471
+ tyro==0.7.3
472
+ tzdata==2024.1
473
+ tzlocal==5.2
474
+ uc-micro-py==1.0.3
475
+ uritemplate==4.1.1
476
+ urllib3==2.0.7
477
+ vega-datasets==0.9.0
478
+ wadllib==1.3.6
479
+ wandb==0.16.4
480
+ wasabi==1.1.2
481
+ wcwidth==0.2.13
482
+ weasel==0.3.4
483
+ webcolors==1.13
484
+ webencodings==0.5.1
485
+ websocket-client==1.7.0
486
+ wheel==0.42.0
487
+ widgetsnbextension==3.6.6
488
+ wordcloud==1.9.3
489
+ wrapt==1.14.1
490
+ xarray-einstats==0.7.0
491
+ xarray==2023.7.0
492
+ xgboost==2.0.3
493
+ xlrd==2.0.1
494
+ xxhash==3.4.1
495
+ xyzservices==2023.10.1
496
+ yarl==1.9.4
497
+ yellowbrick==1.5
498
+ yfinance==0.2.37
499
+ zict==3.0.0
500
+ zipp==3.17.0
wandb/run-20240310_031850-h5a6szhj/files/wandb-metadata.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.1.58+-x86_64-with-glibc2.35",
3
+ "python": "3.10.12",
4
+ "heartbeatAt": "2024-03-10T03:18:54.772132",
5
+ "startedAt": "2024-03-10T03:18:50.350319",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [],
9
+ "state": "running",
10
+ "program": "Copy%20of%20Solar-train-QLoRA.ipynb",
11
+ "codePathLocal": null,
12
+ "colab": "https://colab.research.google.com/notebook#fileId=1cntH6JMHtnqGybNA0Y55Jk1U_HRTWn3M",
13
+ "host": "df99ceffa61d",
14
+ "username": "root",
15
+ "executable": "/usr/bin/python3",
16
+ "cpu_count": 1,
17
+ "cpu_count_logical": 2,
18
+ "cpu_freq": {
19
+ "current": 2199.998,
20
+ "min": 0.0,
21
+ "max": 0.0
22
+ },
23
+ "cpu_freq_per_core": [
24
+ {
25
+ "current": 2199.998,
26
+ "min": 0.0,
27
+ "max": 0.0
28
+ },
29
+ {
30
+ "current": 2199.998,
31
+ "min": 0.0,
32
+ "max": 0.0
33
+ }
34
+ ],
35
+ "disk": {
36
+ "/": {
37
+ "total": 78.1898422241211,
38
+ "used": 47.104637145996094
39
+ }
40
+ },
41
+ "gpu": "Tesla T4",
42
+ "gpu_count": 1,
43
+ "gpu_devices": [
44
+ {
45
+ "name": "Tesla T4",
46
+ "memory_total": 16106127360
47
+ }
48
+ ],
49
+ "memory": {
50
+ "total": 12.674781799316406
51
+ }
52
+ }
wandb/run-20240310_031850-h5a6szhj/logs/debug.log ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-03-10 03:18:50,408 INFO MainThread:323 [wandb_setup.py:_flush():76] Current SDK version is 0.16.4
2
+ 2024-03-10 03:18:50,410 INFO MainThread:323 [wandb_setup.py:_flush():76] Configure stats pid to 323
3
+ 2024-03-10 03:18:50,411 INFO MainThread:323 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
4
+ 2024-03-10 03:18:50,411 INFO MainThread:323 [wandb_setup.py:_flush():76] Loading settings from /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/settings
5
+ 2024-03-10 03:18:50,411 INFO MainThread:323 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-03-10 03:18:50,412 INFO MainThread:323 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-03-10 03:18:50,412 INFO MainThread:323 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
8
+ 2024-03-10 03:18:50,412 INFO MainThread:323 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
9
+ 2024-03-10 03:18:50,413 INFO MainThread:323 [wandb_init.py:_log_setup():526] Logging user logs to /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/run-20240310_031850-h5a6szhj/logs/debug.log
10
+ 2024-03-10 03:18:50,414 INFO MainThread:323 [wandb_init.py:_log_setup():527] Logging internal logs to /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/run-20240310_031850-h5a6szhj/logs/debug-internal.log
11
+ 2024-03-10 03:18:50,414 INFO MainThread:323 [wandb_init.py:_jupyter_setup():472] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x794c78f99780>
12
+ 2024-03-10 03:18:50,415 INFO MainThread:323 [wandb_init.py:init():566] calling init triggers
13
+ 2024-03-10 03:18:50,415 INFO MainThread:323 [wandb_init.py:init():573] wandb.init called with sweep_config: {}
14
+ config: {}
15
+ 2024-03-10 03:18:50,416 INFO MainThread:323 [wandb_init.py:init():616] starting backend
16
+ 2024-03-10 03:18:50,416 INFO MainThread:323 [wandb_init.py:init():620] setting up manager
17
+ 2024-03-10 03:18:50,424 INFO MainThread:323 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2024-03-10 03:18:50,430 INFO MainThread:323 [wandb_init.py:init():628] backend started and connected
19
+ 2024-03-10 03:18:50,467 INFO MainThread:323 [wandb_run.py:_label_probe_notebook():1295] probe notebook
20
+ 2024-03-10 03:18:53,361 INFO MainThread:323 [wandb_init.py:init():720] updated telemetry
21
+ 2024-03-10 03:18:53,375 INFO MainThread:323 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
22
+ 2024-03-10 03:18:54,216 INFO MainThread:323 [wandb_run.py:_on_init():2262] communicating current version
23
+ 2024-03-10 03:18:54,625 INFO MainThread:323 [wandb_run.py:_on_init():2271] got version response
24
+ 2024-03-10 03:18:54,625 INFO MainThread:323 [wandb_init.py:init():804] starting run threads in backend
25
+ 2024-03-10 03:18:56,342 INFO MainThread:323 [wandb_run.py:_console_start():2241] atexit reg
26
+ 2024-03-10 03:18:56,342 INFO MainThread:323 [wandb_run.py:_redirect():2096] redirect: wrap_raw
27
+ 2024-03-10 03:18:56,342 INFO MainThread:323 [wandb_run.py:_redirect():2161] Wrapping output streams.
28
+ 2024-03-10 03:18:56,343 INFO MainThread:323 [wandb_run.py:_redirect():2186] Redirects installed.
29
+ 2024-03-10 03:18:56,345 INFO MainThread:323 [wandb_init.py:init():847] run started, returning control to user process
30
+ 2024-03-10 03:18:56,358 INFO MainThread:323 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 48000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 48, 'num_attention_heads': 32, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 2, 'eos_token_id': 32000, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'Edentns/DataVortexS-10.7B-dpo-v1.11', 'transformers_version': '4.38.2', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/content/drive/MyDrive/best-one', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'constant', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/content/drive/MyDrive/best-one/runs/Mar10_03-18-22_df99ceffa61d', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 50, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/content/drive/MyDrive/best-one', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch_fused', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': '/content/drive/MyDrive/best-one/checkpoint-1000', 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
31
+ 2024-03-10 03:19:29,364 INFO MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
32
+ 2024-03-10 03:19:29,364 INFO MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
33
+ 2024-03-10 03:19:48,135 INFO MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
34
+ 2024-03-10 03:19:48,145 INFO MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
35
+ 2024-03-10 03:19:48,145 INFO MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
36
+ 2024-03-10 03:20:26,801 INFO MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
37
+ 2024-03-10 03:20:28,702 INFO MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
38
+ 2024-03-10 03:20:28,702 INFO MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
39
+ 2024-03-10 03:20:30,823 INFO MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
40
+ 2024-03-10 03:21:36,022 INFO MainThread:323 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 48000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 48, 'num_attention_heads': 32, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 2, 'eos_token_id': 32000, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'Edentns/DataVortexS-10.7B-dpo-v1.11', 'transformers_version': '4.38.2', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/content/drive/MyDrive/best-one', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'constant', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/content/drive/MyDrive/best-one/runs/Mar10_03-19-48_df99ceffa61d', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 50, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/content/drive/MyDrive/best-one', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch_fused', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': '/content/drive/MyDrive/best-one/checkpoint-500', 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
41
+ 2024-03-10 03:21:51,269 INFO MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
42
+ 2024-03-10 03:21:51,269 INFO MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
43
+ 2024-03-10 03:22:00,243 INFO MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
44
+ 2024-03-10 03:22:00,256 INFO MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
45
+ 2024-03-10 03:22:00,256 INFO MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
46
+ 2024-03-10 03:22:03,314 INFO MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
47
+ 2024-03-10 03:22:03,457 INFO MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
48
+ 2024-03-10 03:22:03,457 INFO MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
49
+ 2024-03-10 03:22:12,817 INFO MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
50
+ 2024-03-10 03:22:29,320 INFO MainThread:323 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 48000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 48, 'num_attention_heads': 32, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 2, 'eos_token_id': 32000, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'Edentns/DataVortexS-10.7B-dpo-v1.11', 'transformers_version': '4.38.2', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/content/drive/MyDrive/best-one', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'constant', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/content/drive/MyDrive/best-one/runs/Mar10_03-22-00_df99ceffa61d', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 50, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/content/drive/MyDrive/best-one', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch_fused', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': '/content/drive/MyDrive/best-one/checkpoint-1000', 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
51
+ 2024-03-10 03:30:10,462 INFO MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
52
+ 2024-03-10 03:30:10,462 INFO MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
53
+ 2024-03-10 03:30:16,433 INFO MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
54
+ 2024-03-10 03:30:16,515 INFO MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
55
+ 2024-03-10 03:30:16,515 INFO MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
56
+ 2024-03-10 03:30:49,356 INFO MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
57
+ 2024-03-10 03:30:49,411 INFO MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
58
+ 2024-03-10 03:30:49,411 INFO MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
59
+ 2024-03-10 03:30:59,671 INFO MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
60
+ 2024-03-10 03:30:59,809 INFO MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
61
+ 2024-03-10 03:30:59,810 INFO MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
62
+ 2024-03-10 03:31:01,547 INFO MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
63
+ 2024-03-10 03:31:01,596 INFO MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
64
+ 2024-03-10 03:31:01,598 INFO MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
65
+ 2024-03-10 03:31:18,837 INFO MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
66
+ 2024-03-10 03:31:18,886 INFO MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
67
+ 2024-03-10 03:31:18,886 INFO MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
68
+ 2024-03-10 03:31:26,809 INFO MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
69
+ 2024-03-10 03:31:30,144 INFO MainThread:323 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 48000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 48, 'num_attention_heads': 32, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 2, 'eos_token_id': 32000, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'Edentns/DataVortexS-10.7B-dpo-v1.11', 'transformers_version': '4.38.2', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/content/drive/MyDrive/best-one', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'constant', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/content/drive/MyDrive/best-one/runs/Mar10_03-30-49_df99ceffa61d', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/content/drive/MyDrive/best-one', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch_fused', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': '/content/drive/MyDrive/best-one/checkpoint-1000', 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
70
+ 2024-03-10 03:36:13,219 INFO MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
71
+ 2024-03-10 03:36:13,220 INFO MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
72
+ 2024-03-10 03:36:16,498 INFO MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
73
+ 2024-03-10 03:36:25,544 INFO MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
74
+ 2024-03-10 03:36:25,545 INFO MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
75
+ 2024-03-10 03:37:27,886 INFO MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
76
+ 2024-03-10 03:37:34,416 INFO MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
77
+ 2024-03-10 03:37:34,416 INFO MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
wandb/run-20240310_031850-h5a6szhj/run-h5a6szhj.wandb ADDED
Binary file (32.9 kB). View file
 
wandb/run-20240310_065024-lx2gw13k/files/config.yaml ADDED
@@ -0,0 +1,680 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.10.12
7
+ cli_version: 0.16.4
8
+ framework: huggingface
9
+ huggingface_version: 4.38.2
10
+ is_jupyter_run: true
11
+ is_kaggle_kernel: false
12
+ start_time: 1710053424.0
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 2
17
+ - 3
18
+ - 5
19
+ - 11
20
+ - 12
21
+ - 49
22
+ - 51
23
+ - 53
24
+ - 55
25
+ - 71
26
+ - 84
27
+ - 98
28
+ 2:
29
+ - 1
30
+ - 2
31
+ - 3
32
+ - 5
33
+ - 11
34
+ - 12
35
+ - 49
36
+ - 51
37
+ - 53
38
+ - 55
39
+ - 71
40
+ - 84
41
+ - 98
42
+ 3:
43
+ - 7
44
+ - 23
45
+ 4: 3.10.12
46
+ 5: 0.16.4
47
+ 6: 4.38.2
48
+ 8:
49
+ - 1
50
+ - 5
51
+ - 12
52
+ 9:
53
+ 1: transformers_trainer
54
+ 13: linux-x86_64
55
+ m:
56
+ - 1: train/global_step
57
+ 6:
58
+ - 3
59
+ - 1: train/loss
60
+ 5: 1
61
+ 6:
62
+ - 1
63
+ - 1: train/grad_norm
64
+ 5: 1
65
+ 6:
66
+ - 1
67
+ - 1: train/learning_rate
68
+ 5: 1
69
+ 6:
70
+ - 1
71
+ - 1: train/epoch
72
+ 5: 1
73
+ 6:
74
+ - 1
75
+ vocab_size:
76
+ desc: null
77
+ value: 48000
78
+ max_position_embeddings:
79
+ desc: null
80
+ value: 4096
81
+ hidden_size:
82
+ desc: null
83
+ value: 4096
84
+ intermediate_size:
85
+ desc: null
86
+ value: 14336
87
+ num_hidden_layers:
88
+ desc: null
89
+ value: 48
90
+ num_attention_heads:
91
+ desc: null
92
+ value: 32
93
+ num_key_value_heads:
94
+ desc: null
95
+ value: 8
96
+ hidden_act:
97
+ desc: null
98
+ value: silu
99
+ initializer_range:
100
+ desc: null
101
+ value: 0.02
102
+ rms_norm_eps:
103
+ desc: null
104
+ value: 1.0e-05
105
+ pretraining_tp:
106
+ desc: null
107
+ value: 1
108
+ use_cache:
109
+ desc: null
110
+ value: true
111
+ rope_theta:
112
+ desc: null
113
+ value: 10000.0
114
+ rope_scaling:
115
+ desc: null
116
+ value: null
117
+ attention_bias:
118
+ desc: null
119
+ value: false
120
+ attention_dropout:
121
+ desc: null
122
+ value: 0.0
123
+ return_dict:
124
+ desc: null
125
+ value: true
126
+ output_hidden_states:
127
+ desc: null
128
+ value: false
129
+ output_attentions:
130
+ desc: null
131
+ value: false
132
+ torchscript:
133
+ desc: null
134
+ value: false
135
+ torch_dtype:
136
+ desc: null
137
+ value: float16
138
+ use_bfloat16:
139
+ desc: null
140
+ value: false
141
+ tf_legacy_loss:
142
+ desc: null
143
+ value: false
144
+ pruned_heads:
145
+ desc: null
146
+ value: {}
147
+ tie_word_embeddings:
148
+ desc: null
149
+ value: false
150
+ chunk_size_feed_forward:
151
+ desc: null
152
+ value: 0
153
+ is_encoder_decoder:
154
+ desc: null
155
+ value: false
156
+ is_decoder:
157
+ desc: null
158
+ value: false
159
+ cross_attention_hidden_size:
160
+ desc: null
161
+ value: null
162
+ add_cross_attention:
163
+ desc: null
164
+ value: false
165
+ tie_encoder_decoder:
166
+ desc: null
167
+ value: false
168
+ max_length:
169
+ desc: null
170
+ value: 20
171
+ min_length:
172
+ desc: null
173
+ value: 0
174
+ do_sample:
175
+ desc: null
176
+ value: false
177
+ early_stopping:
178
+ desc: null
179
+ value: false
180
+ num_beams:
181
+ desc: null
182
+ value: 1
183
+ num_beam_groups:
184
+ desc: null
185
+ value: 1
186
+ diversity_penalty:
187
+ desc: null
188
+ value: 0.0
189
+ temperature:
190
+ desc: null
191
+ value: 1.0
192
+ top_k:
193
+ desc: null
194
+ value: 50
195
+ top_p:
196
+ desc: null
197
+ value: 1.0
198
+ typical_p:
199
+ desc: null
200
+ value: 1.0
201
+ repetition_penalty:
202
+ desc: null
203
+ value: 1.0
204
+ length_penalty:
205
+ desc: null
206
+ value: 1.0
207
+ no_repeat_ngram_size:
208
+ desc: null
209
+ value: 0
210
+ encoder_no_repeat_ngram_size:
211
+ desc: null
212
+ value: 0
213
+ bad_words_ids:
214
+ desc: null
215
+ value: null
216
+ num_return_sequences:
217
+ desc: null
218
+ value: 1
219
+ output_scores:
220
+ desc: null
221
+ value: false
222
+ return_dict_in_generate:
223
+ desc: null
224
+ value: false
225
+ forced_bos_token_id:
226
+ desc: null
227
+ value: null
228
+ forced_eos_token_id:
229
+ desc: null
230
+ value: null
231
+ remove_invalid_values:
232
+ desc: null
233
+ value: false
234
+ exponential_decay_length_penalty:
235
+ desc: null
236
+ value: null
237
+ suppress_tokens:
238
+ desc: null
239
+ value: null
240
+ begin_suppress_tokens:
241
+ desc: null
242
+ value: null
243
+ architectures:
244
+ desc: null
245
+ value:
246
+ - LlamaForCausalLM
247
+ finetuning_task:
248
+ desc: null
249
+ value: null
250
+ id2label:
251
+ desc: null
252
+ value:
253
+ '0': LABEL_0
254
+ '1': LABEL_1
255
+ label2id:
256
+ desc: null
257
+ value:
258
+ LABEL_0: 0
259
+ LABEL_1: 1
260
+ tokenizer_class:
261
+ desc: null
262
+ value: null
263
+ prefix:
264
+ desc: null
265
+ value: null
266
+ bos_token_id:
267
+ desc: null
268
+ value: 1
269
+ pad_token_id:
270
+ desc: null
271
+ value: 2
272
+ eos_token_id:
273
+ desc: null
274
+ value: 32000
275
+ sep_token_id:
276
+ desc: null
277
+ value: null
278
+ decoder_start_token_id:
279
+ desc: null
280
+ value: null
281
+ task_specific_params:
282
+ desc: null
283
+ value: null
284
+ problem_type:
285
+ desc: null
286
+ value: null
287
+ _name_or_path:
288
+ desc: null
289
+ value: Edentns/DataVortexS-10.7B-dpo-v1.11
290
+ transformers_version:
291
+ desc: null
292
+ value: 4.38.2
293
+ model_type:
294
+ desc: null
295
+ value: llama
296
+ quantization_config:
297
+ desc: null
298
+ value:
299
+ quant_method: QuantizationMethod.BITS_AND_BYTES
300
+ _load_in_8bit: false
301
+ _load_in_4bit: true
302
+ llm_int8_threshold: 6.0
303
+ llm_int8_skip_modules: null
304
+ llm_int8_enable_fp32_cpu_offload: false
305
+ llm_int8_has_fp16_weight: false
306
+ bnb_4bit_quant_type: nf4
307
+ bnb_4bit_use_double_quant: true
308
+ bnb_4bit_compute_dtype: float16
309
+ load_in_4bit: true
310
+ load_in_8bit: false
311
+ output_dir:
312
+ desc: null
313
+ value: /content/drive/MyDrive/best-one
314
+ overwrite_output_dir:
315
+ desc: null
316
+ value: false
317
+ do_train:
318
+ desc: null
319
+ value: false
320
+ do_eval:
321
+ desc: null
322
+ value: false
323
+ do_predict:
324
+ desc: null
325
+ value: false
326
+ evaluation_strategy:
327
+ desc: null
328
+ value: 'no'
329
+ prediction_loss_only:
330
+ desc: null
331
+ value: false
332
+ per_device_train_batch_size:
333
+ desc: null
334
+ value: 1
335
+ per_device_eval_batch_size:
336
+ desc: null
337
+ value: 8
338
+ per_gpu_train_batch_size:
339
+ desc: null
340
+ value: null
341
+ per_gpu_eval_batch_size:
342
+ desc: null
343
+ value: null
344
+ gradient_accumulation_steps:
345
+ desc: null
346
+ value: 1
347
+ eval_accumulation_steps:
348
+ desc: null
349
+ value: null
350
+ eval_delay:
351
+ desc: null
352
+ value: 0
353
+ learning_rate:
354
+ desc: null
355
+ value: 0.0002
356
+ weight_decay:
357
+ desc: null
358
+ value: 0.0
359
+ adam_beta1:
360
+ desc: null
361
+ value: 0.9
362
+ adam_beta2:
363
+ desc: null
364
+ value: 0.999
365
+ adam_epsilon:
366
+ desc: null
367
+ value: 1.0e-08
368
+ max_grad_norm:
369
+ desc: null
370
+ value: 0.3
371
+ num_train_epochs:
372
+ desc: null
373
+ value: 4
374
+ max_steps:
375
+ desc: null
376
+ value: -1
377
+ lr_scheduler_type:
378
+ desc: null
379
+ value: constant
380
+ lr_scheduler_kwargs:
381
+ desc: null
382
+ value: {}
383
+ warmup_ratio:
384
+ desc: null
385
+ value: 0.03
386
+ warmup_steps:
387
+ desc: null
388
+ value: 0
389
+ log_level:
390
+ desc: null
391
+ value: passive
392
+ log_level_replica:
393
+ desc: null
394
+ value: warning
395
+ log_on_each_node:
396
+ desc: null
397
+ value: true
398
+ logging_dir:
399
+ desc: null
400
+ value: /content/drive/MyDrive/best-one/runs/Mar10_06-48-48_17daf5749447
401
+ logging_strategy:
402
+ desc: null
403
+ value: steps
404
+ logging_first_step:
405
+ desc: null
406
+ value: false
407
+ logging_steps:
408
+ desc: null
409
+ value: 500
410
+ logging_nan_inf_filter:
411
+ desc: null
412
+ value: true
413
+ save_strategy:
414
+ desc: null
415
+ value: steps
416
+ save_steps:
417
+ desc: null
418
+ value: 100
419
+ save_total_limit:
420
+ desc: null
421
+ value: null
422
+ save_safetensors:
423
+ desc: null
424
+ value: true
425
+ save_on_each_node:
426
+ desc: null
427
+ value: false
428
+ save_only_model:
429
+ desc: null
430
+ value: false
431
+ no_cuda:
432
+ desc: null
433
+ value: false
434
+ use_cpu:
435
+ desc: null
436
+ value: false
437
+ use_mps_device:
438
+ desc: null
439
+ value: false
440
+ seed:
441
+ desc: null
442
+ value: 42
443
+ data_seed:
444
+ desc: null
445
+ value: null
446
+ jit_mode_eval:
447
+ desc: null
448
+ value: false
449
+ use_ipex:
450
+ desc: null
451
+ value: false
452
+ bf16:
453
+ desc: null
454
+ value: false
455
+ fp16:
456
+ desc: null
457
+ value: true
458
+ fp16_opt_level:
459
+ desc: null
460
+ value: O1
461
+ half_precision_backend:
462
+ desc: null
463
+ value: auto
464
+ bf16_full_eval:
465
+ desc: null
466
+ value: false
467
+ fp16_full_eval:
468
+ desc: null
469
+ value: false
470
+ tf32:
471
+ desc: null
472
+ value: null
473
+ local_rank:
474
+ desc: null
475
+ value: 0
476
+ ddp_backend:
477
+ desc: null
478
+ value: null
479
+ tpu_num_cores:
480
+ desc: null
481
+ value: null
482
+ tpu_metrics_debug:
483
+ desc: null
484
+ value: false
485
+ debug:
486
+ desc: null
487
+ value: []
488
+ dataloader_drop_last:
489
+ desc: null
490
+ value: false
491
+ eval_steps:
492
+ desc: null
493
+ value: null
494
+ dataloader_num_workers:
495
+ desc: null
496
+ value: 0
497
+ dataloader_prefetch_factor:
498
+ desc: null
499
+ value: null
500
+ past_index:
501
+ desc: null
502
+ value: -1
503
+ run_name:
504
+ desc: null
505
+ value: /content/drive/MyDrive/best-one
506
+ disable_tqdm:
507
+ desc: null
508
+ value: false
509
+ remove_unused_columns:
510
+ desc: null
511
+ value: true
512
+ label_names:
513
+ desc: null
514
+ value: null
515
+ load_best_model_at_end:
516
+ desc: null
517
+ value: false
518
+ metric_for_best_model:
519
+ desc: null
520
+ value: null
521
+ greater_is_better:
522
+ desc: null
523
+ value: null
524
+ ignore_data_skip:
525
+ desc: null
526
+ value: false
527
+ fsdp:
528
+ desc: null
529
+ value: []
530
+ fsdp_min_num_params:
531
+ desc: null
532
+ value: 0
533
+ fsdp_config:
534
+ desc: null
535
+ value:
536
+ min_num_params: 0
537
+ xla: false
538
+ xla_fsdp_v2: false
539
+ xla_fsdp_grad_ckpt: false
540
+ fsdp_transformer_layer_cls_to_wrap:
541
+ desc: null
542
+ value: null
543
+ accelerator_config:
544
+ desc: null
545
+ value:
546
+ split_batches: false
547
+ dispatch_batches: null
548
+ even_batches: true
549
+ use_seedable_sampler: true
550
+ deepspeed:
551
+ desc: null
552
+ value: null
553
+ label_smoothing_factor:
554
+ desc: null
555
+ value: 0.0
556
+ optim:
557
+ desc: null
558
+ value: adamw_torch_fused
559
+ optim_args:
560
+ desc: null
561
+ value: null
562
+ adafactor:
563
+ desc: null
564
+ value: false
565
+ group_by_length:
566
+ desc: null
567
+ value: false
568
+ length_column_name:
569
+ desc: null
570
+ value: length
571
+ report_to:
572
+ desc: null
573
+ value:
574
+ - tensorboard
575
+ - wandb
576
+ ddp_find_unused_parameters:
577
+ desc: null
578
+ value: null
579
+ ddp_bucket_cap_mb:
580
+ desc: null
581
+ value: null
582
+ ddp_broadcast_buffers:
583
+ desc: null
584
+ value: null
585
+ dataloader_pin_memory:
586
+ desc: null
587
+ value: true
588
+ dataloader_persistent_workers:
589
+ desc: null
590
+ value: false
591
+ skip_memory_metrics:
592
+ desc: null
593
+ value: true
594
+ use_legacy_prediction_loop:
595
+ desc: null
596
+ value: false
597
+ push_to_hub:
598
+ desc: null
599
+ value: true
600
+ resume_from_checkpoint:
601
+ desc: null
602
+ value: /content/drive/MyDrive/best-one/checkpoint-1000
603
+ hub_model_id:
604
+ desc: null
605
+ value: null
606
+ hub_strategy:
607
+ desc: null
608
+ value: every_save
609
+ hub_token:
610
+ desc: null
611
+ value: <HUB_TOKEN>
612
+ hub_private_repo:
613
+ desc: null
614
+ value: false
615
+ hub_always_push:
616
+ desc: null
617
+ value: false
618
+ gradient_checkpointing:
619
+ desc: null
620
+ value: true
621
+ gradient_checkpointing_kwargs:
622
+ desc: null
623
+ value: null
624
+ include_inputs_for_metrics:
625
+ desc: null
626
+ value: false
627
+ fp16_backend:
628
+ desc: null
629
+ value: auto
630
+ push_to_hub_model_id:
631
+ desc: null
632
+ value: null
633
+ push_to_hub_organization:
634
+ desc: null
635
+ value: null
636
+ push_to_hub_token:
637
+ desc: null
638
+ value: <PUSH_TO_HUB_TOKEN>
639
+ mp_parameters:
640
+ desc: null
641
+ value: ''
642
+ auto_find_batch_size:
643
+ desc: null
644
+ value: false
645
+ full_determinism:
646
+ desc: null
647
+ value: false
648
+ torchdynamo:
649
+ desc: null
650
+ value: null
651
+ ray_scope:
652
+ desc: null
653
+ value: last
654
+ ddp_timeout:
655
+ desc: null
656
+ value: 1800
657
+ torch_compile:
658
+ desc: null
659
+ value: false
660
+ torch_compile_backend:
661
+ desc: null
662
+ value: null
663
+ torch_compile_mode:
664
+ desc: null
665
+ value: null
666
+ dispatch_batches:
667
+ desc: null
668
+ value: null
669
+ split_batches:
670
+ desc: null
671
+ value: null
672
+ include_tokens_per_second:
673
+ desc: null
674
+ value: false
675
+ include_num_input_tokens_seen:
676
+ desc: null
677
+ value: false
678
+ neftune_noise_alpha:
679
+ desc: null
680
+ value: null
wandb/run-20240310_065024-lx2gw13k/files/output.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
2
+ /usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
3
+ warnings.warn(
4
+ /usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
5
+ warnings.warn(
6
+ /usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
wandb/run-20240310_065024-lx2gw13k/files/requirements.txt ADDED
@@ -0,0 +1,500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Babel==2.14.0
2
+ CacheControl==0.14.0
3
+ Cython==3.0.9
4
+ Flask==2.2.5
5
+ GDAL==3.6.4
6
+ GitPython==3.1.42
7
+ Jinja2==3.1.3
8
+ Markdown==3.5.2
9
+ MarkupSafe==2.1.5
10
+ Pillow==9.4.0
11
+ PyDrive2==1.6.3
12
+ PyDrive==1.3.1
13
+ PyGObject==3.42.1
14
+ PyJWT==2.3.0
15
+ PyOpenGL==3.1.7
16
+ PySocks==1.7.1
17
+ PyWavelets==1.5.0
18
+ PyYAML==6.0.1
19
+ Pygments==2.16.1
20
+ SQLAlchemy==2.0.28
21
+ SecretStorage==3.3.1
22
+ Send2Trash==1.8.2
23
+ Sphinx==5.0.2
24
+ Werkzeug==3.0.1
25
+ absl-py==1.4.0
26
+ accelerate==0.27.2
27
+ aiohttp==3.9.3
28
+ aiosignal==1.3.1
29
+ alabaster==0.7.16
30
+ albumentations==1.3.1
31
+ altair==4.2.2
32
+ annotated-types==0.6.0
33
+ anyio==3.7.1
34
+ appdirs==1.4.4
35
+ argon2-cffi-bindings==21.2.0
36
+ argon2-cffi==23.1.0
37
+ array-record==0.5.0
38
+ arviz==0.15.1
39
+ astropy==5.3.4
40
+ astunparse==1.6.3
41
+ async-timeout==4.0.3
42
+ atpublic==4.0
43
+ attrs==23.2.0
44
+ audioread==3.0.1
45
+ autograd==1.6.2
46
+ backcall==0.2.0
47
+ beautifulsoup4==4.12.3
48
+ bidict==0.23.1
49
+ bigframes==0.22.0
50
+ bitsandbytes==0.43.0
51
+ bleach==6.1.0
52
+ blinker==1.4
53
+ blis==0.7.11
54
+ blosc2==2.0.0
55
+ bokeh==3.3.4
56
+ bqplot==0.12.43
57
+ branca==0.7.1
58
+ build==1.1.1
59
+ cachetools==5.3.3
60
+ catalogue==2.0.10
61
+ certifi==2024.2.2
62
+ cffi==1.16.0
63
+ chardet==5.2.0
64
+ charset-normalizer==3.3.2
65
+ chex==0.1.85
66
+ click-plugins==1.1.1
67
+ click==8.1.7
68
+ cligj==0.7.2
69
+ cloudpathlib==0.16.0
70
+ cloudpickle==2.2.1
71
+ cmake==3.27.9
72
+ cmdstanpy==1.2.1
73
+ colorcet==3.1.0
74
+ colorlover==0.3.0
75
+ colour==0.1.5
76
+ community==1.0.0b1
77
+ confection==0.1.4
78
+ cons==0.4.6
79
+ contextlib2==21.6.0
80
+ contourpy==1.2.0
81
+ cryptography==42.0.5
82
+ cufflinks==0.17.3
83
+ cupy-cuda12x==12.2.0
84
+ cvxopt==1.3.2
85
+ cvxpy==1.3.3
86
+ cycler==0.12.1
87
+ cymem==2.0.8
88
+ dask==2023.8.1
89
+ datascience==0.17.6
90
+ datasets==2.18.0
91
+ db-dtypes==1.2.0
92
+ dbus-python==1.2.18
93
+ debugpy==1.6.6
94
+ decorator==4.4.2
95
+ defusedxml==0.7.1
96
+ dill==0.3.8
97
+ distributed==2023.8.1
98
+ distro==1.7.0
99
+ dlib==19.24.2
100
+ dm-tree==0.1.8
101
+ docker-pycreds==0.4.0
102
+ docstring-parser==0.15
103
+ docutils==0.18.1
104
+ dopamine-rl==4.0.6
105
+ duckdb==0.9.2
106
+ earthengine-api==0.1.392
107
+ easydict==1.13
108
+ ecos==2.0.13
109
+ editdistance==0.6.2
110
+ eerepr==0.0.4
111
+ en-core-web-sm==3.7.1
112
+ entrypoints==0.4
113
+ et-xmlfile==1.1.0
114
+ etils==1.7.0
115
+ etuples==0.3.9
116
+ exceptiongroup==1.2.0
117
+ fastai==2.7.14
118
+ fastcore==1.5.29
119
+ fastdownload==0.0.7
120
+ fastjsonschema==2.19.1
121
+ fastprogress==1.0.3
122
+ fastrlock==0.8.2
123
+ filelock==3.13.1
124
+ fiona==1.9.5
125
+ firebase-admin==5.3.0
126
+ flatbuffers==23.5.26
127
+ flax==0.8.1
128
+ folium==0.14.0
129
+ fonttools==4.49.0
130
+ frozendict==2.4.0
131
+ frozenlist==1.4.1
132
+ fsspec==2023.6.0
133
+ future==0.18.3
134
+ gast==0.5.4
135
+ gcsfs==2023.6.0
136
+ gdown==4.7.3
137
+ geemap==0.32.0
138
+ gensim==4.3.2
139
+ geocoder==1.38.1
140
+ geographiclib==2.0
141
+ geopandas==0.13.2
142
+ geopy==2.3.0
143
+ gin-config==0.5.0
144
+ gitdb==4.0.11
145
+ glob2==0.7
146
+ google-ai-generativelanguage==0.4.0
147
+ google-api-core==2.11.1
148
+ google-api-python-client==2.84.0
149
+ google-auth-httplib2==0.1.1
150
+ google-auth-oauthlib==1.2.0
151
+ google-auth==2.27.0
152
+ google-cloud-aiplatform==1.43.0
153
+ google-cloud-bigquery-connection==1.12.1
154
+ google-cloud-bigquery-storage==2.24.0
155
+ google-cloud-bigquery==3.12.0
156
+ google-cloud-core==2.3.3
157
+ google-cloud-datastore==2.15.2
158
+ google-cloud-firestore==2.11.1
159
+ google-cloud-functions==1.13.3
160
+ google-cloud-iam==2.14.3
161
+ google-cloud-language==2.13.3
162
+ google-cloud-resource-manager==1.12.3
163
+ google-cloud-storage==2.8.0
164
+ google-cloud-translate==3.11.3
165
+ google-colab==1.0.0
166
+ google-crc32c==1.5.0
167
+ google-generativeai==0.3.2
168
+ google-pasta==0.2.0
169
+ google-resumable-media==2.7.0
170
+ google==2.0.3
171
+ googleapis-common-protos==1.62.0
172
+ googledrivedownloader==0.4
173
+ graphviz==0.20.1
174
+ greenlet==3.0.3
175
+ grpc-google-iam-v1==0.13.0
176
+ grpcio-status==1.48.2
177
+ grpcio==1.62.0
178
+ gspread-dataframe==3.3.1
179
+ gspread==3.4.2
180
+ gym-notices==0.0.8
181
+ gym==0.25.2
182
+ h5netcdf==1.3.0
183
+ h5py==3.9.0
184
+ holidays==0.44
185
+ holoviews==1.17.1
186
+ html5lib==1.1
187
+ httpimport==1.3.1
188
+ httplib2==0.22.0
189
+ huggingface-hub==0.20.3
190
+ humanize==4.7.0
191
+ hyperopt==0.2.7
192
+ ibis-framework==7.1.0
193
+ idna==3.6
194
+ imageio-ffmpeg==0.4.9
195
+ imageio==2.31.6
196
+ imagesize==1.4.1
197
+ imbalanced-learn==0.10.1
198
+ imgaug==0.4.0
199
+ importlib-metadata==7.0.1
200
+ importlib_resources==6.1.2
201
+ imutils==0.5.4
202
+ inflect==7.0.0
203
+ iniconfig==2.0.0
204
+ intel-openmp==2023.2.3
205
+ ipyevents==2.0.2
206
+ ipyfilechooser==0.6.0
207
+ ipykernel==5.5.6
208
+ ipyleaflet==0.18.2
209
+ ipython-genutils==0.2.0
210
+ ipython-sql==0.5.0
211
+ ipython==7.34.0
212
+ ipytree==0.2.2
213
+ ipywidgets==7.7.1
214
+ itsdangerous==2.1.2
215
+ jax==0.4.23
216
+ jaxlib==0.4.23+cuda12.cudnn89
217
+ jeepney==0.7.1
218
+ jieba==0.42.1
219
+ joblib==1.3.2
220
+ jsonpickle==3.0.3
221
+ jsonschema-specifications==2023.12.1
222
+ jsonschema==4.19.2
223
+ jupyter-client==6.1.12
224
+ jupyter-console==6.1.0
225
+ jupyter-server==1.24.0
226
+ jupyter_core==5.7.1
227
+ jupyterlab_pygments==0.3.0
228
+ jupyterlab_widgets==3.0.10
229
+ kaggle==1.5.16
230
+ kagglehub==0.2.0
231
+ keras==2.15.0
232
+ keyring==23.5.0
233
+ kiwisolver==1.4.5
234
+ langcodes==3.3.0
235
+ launchpadlib==1.10.16
236
+ lazr.restfulclient==0.14.4
237
+ lazr.uri==1.0.6
238
+ lazy_loader==0.3
239
+ libclang==16.0.6
240
+ librosa==0.10.1
241
+ lightgbm==4.1.0
242
+ linkify-it-py==2.0.3
243
+ llvmlite==0.41.1
244
+ locket==1.0.0
245
+ logical-unification==0.4.6
246
+ lxml==4.9.4
247
+ malloy==2023.1067
248
+ markdown-it-py==3.0.0
249
+ matplotlib-inline==0.1.6
250
+ matplotlib-venn==0.11.10
251
+ matplotlib==3.7.1
252
+ mdit-py-plugins==0.4.0
253
+ mdurl==0.1.2
254
+ miniKanren==1.0.3
255
+ missingno==0.5.2
256
+ mistune==0.8.4
257
+ mizani==0.9.3
258
+ mkl==2023.2.0
259
+ ml-dtypes==0.2.0
260
+ mlxtend==0.22.0
261
+ more-itertools==10.1.0
262
+ moviepy==1.0.3
263
+ mpmath==1.3.0
264
+ msgpack==1.0.8
265
+ multidict==6.0.5
266
+ multipledispatch==1.0.0
267
+ multiprocess==0.70.16
268
+ multitasking==0.0.11
269
+ murmurhash==1.0.10
270
+ music21==9.1.0
271
+ natsort==8.4.0
272
+ nbclassic==1.0.0
273
+ nbclient==0.9.0
274
+ nbconvert==6.5.4
275
+ nbformat==5.9.2
276
+ nest-asyncio==1.6.0
277
+ networkx==3.2.1
278
+ nibabel==4.0.2
279
+ nltk==3.8.1
280
+ notebook==6.5.5
281
+ notebook_shim==0.2.4
282
+ numba==0.58.1
283
+ numexpr==2.9.0
284
+ numpy==1.25.2
285
+ oauth2client==4.1.3
286
+ oauthlib==3.2.2
287
+ opencv-contrib-python==4.8.0.76
288
+ opencv-python-headless==4.9.0.80
289
+ opencv-python==4.8.0.76
290
+ openpyxl==3.1.2
291
+ opt-einsum==3.3.0
292
+ optax==0.1.9
293
+ orbax-checkpoint==0.4.4
294
+ osqp==0.6.2.post8
295
+ packaging==23.2
296
+ pandas-datareader==0.10.0
297
+ pandas-gbq==0.19.2
298
+ pandas-stubs==1.5.3.230304
299
+ pandas==2.2.1
300
+ pandocfilters==1.5.1
301
+ panel==1.3.8
302
+ param==2.0.2
303
+ parso==0.8.3
304
+ parsy==2.1
305
+ partd==1.4.1
306
+ pathlib==1.0.1
307
+ patsy==0.5.6
308
+ peewee==3.17.1
309
+ peft==0.9.0
310
+ pexpect==4.9.0
311
+ pickleshare==0.7.5
312
+ pins==0.8.4
313
+ pip-tools==6.13.0
314
+ pip==23.1.2
315
+ platformdirs==4.2.0
316
+ plotly==5.15.0
317
+ plotnine==0.12.4
318
+ pluggy==1.4.0
319
+ polars==0.20.2
320
+ pooch==1.8.1
321
+ portpicker==1.5.2
322
+ prefetch-generator==1.0.3
323
+ preshed==3.0.9
324
+ prettytable==3.10.0
325
+ proglog==0.1.10
326
+ progressbar2==4.2.0
327
+ prometheus_client==0.20.0
328
+ promise==2.3
329
+ prompt-toolkit==3.0.43
330
+ prophet==1.1.5
331
+ proto-plus==1.23.0
332
+ protobuf==3.20.3
333
+ psutil==5.9.5
334
+ psycopg2==2.9.9
335
+ ptyprocess==0.7.0
336
+ py-cpuinfo==9.0.0
337
+ py4j==0.10.9.7
338
+ pyOpenSSL==24.0.0
339
+ pyarrow-hotfix==0.6
340
+ pyarrow==14.0.2
341
+ pyasn1-modules==0.3.0
342
+ pyasn1==0.5.1
343
+ pycocotools==2.0.7
344
+ pycparser==2.21
345
+ pydantic==2.6.3
346
+ pydantic_core==2.16.3
347
+ pydata-google-auth==1.8.2
348
+ pydot-ng==2.0.0
349
+ pydot==1.4.2
350
+ pydotplus==2.0.2
351
+ pyerfa==2.0.1.1
352
+ pygame==2.5.2
353
+ pymc==5.10.4
354
+ pymystem3==0.2.0
355
+ pyparsing==3.1.1
356
+ pyperclip==1.8.2
357
+ pyproj==3.6.1
358
+ pyproject_hooks==1.0.0
359
+ pyshp==2.3.1
360
+ pytensor==2.18.6
361
+ pytest==7.4.4
362
+ python-apt==0.0.0
363
+ python-box==7.1.1
364
+ python-dateutil==2.8.2
365
+ python-louvain==0.16
366
+ python-slugify==8.0.4
367
+ python-utils==3.8.2
368
+ pytz==2023.4
369
+ pyviz_comms==3.0.1
370
+ pyzmq==23.2.1
371
+ qdldl==0.1.7.post0
372
+ qudida==0.0.4
373
+ ratelim==0.1.6
374
+ referencing==0.33.0
375
+ regex==2023.12.25
376
+ requests-oauthlib==1.3.1
377
+ requests==2.31.0
378
+ requirements-parser==0.5.0
379
+ rich==13.7.1
380
+ rpds-py==0.18.0
381
+ rpy2==3.4.2
382
+ rsa==4.9
383
+ safetensors==0.4.2
384
+ scikit-image==0.19.3
385
+ scikit-learn==1.2.2
386
+ scipy==1.11.4
387
+ scooby==0.9.2
388
+ scs==3.2.4.post1
389
+ seaborn==0.13.1
390
+ sentence-transformers==2.5.1
391
+ sentencepiece==0.1.99
392
+ sentry-sdk==1.41.0
393
+ setproctitle==1.3.3
394
+ setuptools==67.7.2
395
+ shapely==2.0.3
396
+ shtab==1.7.1
397
+ six==1.16.0
398
+ six==1.16.0
399
+ sklearn-pandas==2.2.0
400
+ smart-open==6.4.0
401
+ smmap==5.0.1
402
+ sniffio==1.3.1
403
+ snowballstemmer==2.2.0
404
+ sortedcontainers==2.4.0
405
+ soundfile==0.12.1
406
+ soupsieve==2.5
407
+ soxr==0.3.7
408
+ spacy-legacy==3.0.12
409
+ spacy-loggers==1.0.5
410
+ spacy==3.7.4
411
+ sphinxcontrib-applehelp==1.0.8
412
+ sphinxcontrib-devhelp==1.0.6
413
+ sphinxcontrib-htmlhelp==2.0.5
414
+ sphinxcontrib-jsmath==1.0.1
415
+ sphinxcontrib-qthelp==1.0.7
416
+ sphinxcontrib-serializinghtml==1.1.10
417
+ sqlglot==19.9.0
418
+ sqlparse==0.4.4
419
+ srsly==2.4.8
420
+ stanio==0.3.0
421
+ statsmodels==0.14.1
422
+ sympy==1.12
423
+ tables==3.8.0
424
+ tabulate==0.9.0
425
+ tbb==2021.11.0
426
+ tblib==3.0.0
427
+ tenacity==8.2.3
428
+ tensorboard-data-server==0.7.2
429
+ tensorboard==2.15.2
430
+ tensorflow-datasets==4.9.4
431
+ tensorflow-estimator==2.15.0
432
+ tensorflow-gcs-config==2.15.0
433
+ tensorflow-hub==0.16.1
434
+ tensorflow-io-gcs-filesystem==0.36.0
435
+ tensorflow-metadata==1.14.0
436
+ tensorflow-probability==0.23.0
437
+ tensorflow==2.15.0
438
+ tensorstore==0.1.45
439
+ termcolor==2.4.0
440
+ terminado==0.18.0
441
+ text-unidecode==1.3
442
+ textblob==0.17.1
443
+ tf-keras==2.15.0
444
+ tf-slim==1.1.0
445
+ thinc==8.2.3
446
+ threadpoolctl==3.3.0
447
+ tifffile==2024.2.12
448
+ tinycss2==1.2.1
449
+ tokenizers==0.15.2
450
+ toml==0.10.2
451
+ tomli==2.0.1
452
+ toolz==0.12.1
453
+ torch==2.1.0+cu121
454
+ torchaudio==2.1.0+cu121
455
+ torchdata==0.7.0
456
+ torchsummary==1.5.1
457
+ torchtext==0.16.0
458
+ torchvision==0.16.0+cu121
459
+ tornado==6.3.3
460
+ tqdm==4.66.2
461
+ traitlets==5.7.1
462
+ traittypes==0.2.1
463
+ transformers==4.38.2
464
+ triton==2.1.0
465
+ trl==0.7.11
466
+ tweepy==4.14.0
467
+ typer==0.9.0
468
+ types-pytz==2024.1.0.20240203
469
+ types-setuptools==69.1.0.20240302
470
+ typing_extensions==4.10.0
471
+ tyro==0.7.3
472
+ tzdata==2024.1
473
+ tzlocal==5.2
474
+ uc-micro-py==1.0.3
475
+ uritemplate==4.1.1
476
+ urllib3==2.0.7
477
+ vega-datasets==0.9.0
478
+ wadllib==1.3.6
479
+ wandb==0.16.4
480
+ wasabi==1.1.2
481
+ wcwidth==0.2.13
482
+ weasel==0.3.4
483
+ webcolors==1.13
484
+ webencodings==0.5.1
485
+ websocket-client==1.7.0
486
+ wheel==0.42.0
487
+ widgetsnbextension==3.6.6
488
+ wordcloud==1.9.3
489
+ wrapt==1.14.1
490
+ xarray-einstats==0.7.0
491
+ xarray==2023.7.0
492
+ xgboost==2.0.3
493
+ xlrd==2.0.1
494
+ xxhash==3.4.1
495
+ xyzservices==2023.10.1
496
+ yarl==1.9.4
497
+ yellowbrick==1.5
498
+ yfinance==0.2.37
499
+ zict==3.0.0
500
+ zipp==3.17.0
wandb/run-20240310_065024-lx2gw13k/files/wandb-metadata.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.1.58+-x86_64-with-glibc2.35",
3
+ "python": "3.10.12",
4
+ "heartbeatAt": "2024-03-10T06:50:27.883167",
5
+ "startedAt": "2024-03-10T06:50:24.686000",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [],
9
+ "state": "running",
10
+ "program": "Final-Solar-train-QLoRA.ipynb",
11
+ "codePathLocal": null,
12
+ "colab": "https://colab.research.google.com/notebook#fileId=1cntH6JMHtnqGybNA0Y55Jk1U_HRTWn3M",
13
+ "host": "17daf5749447",
14
+ "username": "root",
15
+ "executable": "/usr/bin/python3",
16
+ "cpu_count": 1,
17
+ "cpu_count_logical": 2,
18
+ "cpu_freq": {
19
+ "current": 2000.202,
20
+ "min": 0.0,
21
+ "max": 0.0
22
+ },
23
+ "cpu_freq_per_core": [
24
+ {
25
+ "current": 2000.202,
26
+ "min": 0.0,
27
+ "max": 0.0
28
+ },
29
+ {
30
+ "current": 2000.202,
31
+ "min": 0.0,
32
+ "max": 0.0
33
+ }
34
+ ],
35
+ "disk": {
36
+ "/": {
37
+ "total": 78.1898422241211,
38
+ "used": 48.04489517211914
39
+ }
40
+ },
41
+ "gpu": "Tesla T4",
42
+ "gpu_count": 1,
43
+ "gpu_devices": [
44
+ {
45
+ "name": "Tesla T4",
46
+ "memory_total": 16106127360
47
+ }
48
+ ],
49
+ "memory": {
50
+ "total": 12.674789428710938
51
+ }
52
+ }
wandb/run-20240310_065024-lx2gw13k/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/loss": 0.6012, "train/grad_norm": 3.3899552822113037, "train/learning_rate": 0.0002, "train/epoch": 1.99, "train/global_step": 2000, "_timestamp": 1710061377.8150547, "_runtime": 7953.062962770462, "_step": 1}
wandb/run-20240310_065024-lx2gw13k/logs/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20240310_065024-lx2gw13k/logs/debug.log ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-03-10 06:50:24,735 INFO MainThread:929 [wandb_setup.py:_flush():76] Current SDK version is 0.16.4
2
+ 2024-03-10 06:50:24,736 INFO MainThread:929 [wandb_setup.py:_flush():76] Configure stats pid to 929
3
+ 2024-03-10 06:50:24,737 INFO MainThread:929 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
4
+ 2024-03-10 06:50:24,737 INFO MainThread:929 [wandb_setup.py:_flush():76] Loading settings from /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/settings
5
+ 2024-03-10 06:50:24,737 INFO MainThread:929 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-03-10 06:50:24,737 INFO MainThread:929 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-03-10 06:50:24,738 INFO MainThread:929 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
8
+ 2024-03-10 06:50:24,738 INFO MainThread:929 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
9
+ 2024-03-10 06:50:24,739 INFO MainThread:929 [wandb_init.py:_log_setup():526] Logging user logs to /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/run-20240310_065024-lx2gw13k/logs/debug.log
10
+ 2024-03-10 06:50:24,740 INFO MainThread:929 [wandb_init.py:_log_setup():527] Logging internal logs to /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/run-20240310_065024-lx2gw13k/logs/debug-internal.log
11
+ 2024-03-10 06:50:24,740 INFO MainThread:929 [wandb_init.py:_jupyter_setup():472] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7aa380ba0220>
12
+ 2024-03-10 06:50:24,741 INFO MainThread:929 [wandb_init.py:init():566] calling init triggers
13
+ 2024-03-10 06:50:24,741 INFO MainThread:929 [wandb_init.py:init():573] wandb.init called with sweep_config: {}
14
+ config: {}
15
+ 2024-03-10 06:50:24,741 INFO MainThread:929 [wandb_init.py:init():616] starting backend
16
+ 2024-03-10 06:50:24,742 INFO MainThread:929 [wandb_init.py:init():620] setting up manager
17
+ 2024-03-10 06:50:24,749 INFO MainThread:929 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2024-03-10 06:50:24,751 INFO MainThread:929 [wandb_init.py:init():628] backend started and connected
19
+ 2024-03-10 06:50:24,833 INFO MainThread:929 [wandb_run.py:_label_probe_notebook():1295] probe notebook
20
+ 2024-03-10 06:50:27,302 INFO MainThread:929 [wandb_init.py:init():720] updated telemetry
21
+ 2024-03-10 06:50:27,312 INFO MainThread:929 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
22
+ 2024-03-10 06:50:27,741 INFO MainThread:929 [wandb_run.py:_on_init():2262] communicating current version
23
+ 2024-03-10 06:50:27,844 INFO MainThread:929 [wandb_run.py:_on_init():2271] got version response
24
+ 2024-03-10 06:50:27,844 INFO MainThread:929 [wandb_init.py:init():804] starting run threads in backend
25
+ 2024-03-10 06:50:28,339 INFO MainThread:929 [wandb_run.py:_console_start():2241] atexit reg
26
+ 2024-03-10 06:50:28,339 INFO MainThread:929 [wandb_run.py:_redirect():2096] redirect: wrap_raw
27
+ 2024-03-10 06:50:28,340 INFO MainThread:929 [wandb_run.py:_redirect():2161] Wrapping output streams.
28
+ 2024-03-10 06:50:28,340 INFO MainThread:929 [wandb_run.py:_redirect():2186] Redirects installed.
29
+ 2024-03-10 06:50:28,343 INFO MainThread:929 [wandb_init.py:init():847] run started, returning control to user process
30
+ 2024-03-10 06:50:28,351 INFO MainThread:929 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 48000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 48, 'num_attention_heads': 32, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 2, 'eos_token_id': 32000, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'Edentns/DataVortexS-10.7B-dpo-v1.11', 'transformers_version': '4.38.2', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/content/drive/MyDrive/best-one', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'constant', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/content/drive/MyDrive/best-one/runs/Mar10_06-48-48_17daf5749447', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/content/drive/MyDrive/best-one', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch_fused', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': '/content/drive/MyDrive/best-one/checkpoint-1000', 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
wandb/run-20240310_065024-lx2gw13k/run-lx2gw13k.wandb ADDED
Binary file (164 kB). View file