kertob commited on
Commit
f3b5a3d
1 Parent(s): 9ffaa91

Training in progress, step 10

Browse files
adapter_config.json CHANGED
@@ -19,10 +19,10 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "query_key_value",
23
  "dense_h_to_4h",
24
  "dense_4h_to_h",
25
- "dense"
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_rslora": false
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "dense",
23
  "dense_h_to_4h",
24
  "dense_4h_to_h",
25
+ "query_key_value"
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c64d2c2ebef88764f7ab7aca8520fb32afeec469a809ed0b3699fab4eb532e83
3
  size 261131840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:836e02fe12c9a7a7d72cf50137dd461895add8fbe0992082d55a49076f613f23
3
  size 261131840
runs/Feb14_12-54-25_f4b889916ade/events.out.tfevents.1707915266.f4b889916ade.34240.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d328d59d18eea5bf1745e4cffdaac3b9ee792f5a6141799fcd6710d072080f48
3
+ size 5620
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3ad917fee89e2d2ef8743a8c2e5730c727035657cfd0953306633c6dafc920e
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:522c12ea72da11a88382314295653dd1a1e797b20b477a13fb7ec0b4ab301b9e
3
  size 4664
wandb/debug-cli.root.log ADDED
File without changes
wandb/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/debug.log CHANGED
@@ -1,43 +1,29 @@
1
- 2024-02-14 11:54:05,514 INFO MainThread:4105 [wandb_setup.py:_flush():76] Current SDK version is 0.16.3
2
- 2024-02-14 11:54:05,514 INFO MainThread:4105 [wandb_setup.py:_flush():76] Configure stats pid to 4105
3
- 2024-02-14 11:54:05,514 INFO MainThread:4105 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
4
- 2024-02-14 11:54:05,514 INFO MainThread:4105 [wandb_setup.py:_flush():76] Loading settings from /content/wandb/settings
5
- 2024-02-14 11:54:05,514 INFO MainThread:4105 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
- 2024-02-14 11:54:05,514 INFO MainThread:4105 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
- 2024-02-14 11:54:05,514 INFO MainThread:4105 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
8
- 2024-02-14 11:54:05,514 INFO MainThread:4105 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
9
- 2024-02-14 11:54:05,514 INFO MainThread:4105 [wandb_init.py:_log_setup():526] Logging user logs to /content/wandb/run-20240214_115405-518dheci/logs/debug.log
10
- 2024-02-14 11:54:05,514 INFO MainThread:4105 [wandb_init.py:_log_setup():527] Logging internal logs to /content/wandb/run-20240214_115405-518dheci/logs/debug-internal.log
11
- 2024-02-14 11:54:05,515 INFO MainThread:4105 [wandb_init.py:_jupyter_setup():472] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7933b2d894b0>
12
- 2024-02-14 11:54:05,515 INFO MainThread:4105 [wandb_init.py:init():566] calling init triggers
13
- 2024-02-14 11:54:05,515 INFO MainThread:4105 [wandb_init.py:init():573] wandb.init called with sweep_config: {}
14
  config: {}
15
- 2024-02-14 11:54:05,515 INFO MainThread:4105 [wandb_init.py:init():616] starting backend
16
- 2024-02-14 11:54:05,515 INFO MainThread:4105 [wandb_init.py:init():620] setting up manager
17
- 2024-02-14 11:54:05,517 INFO MainThread:4105 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
- 2024-02-14 11:54:05,518 INFO MainThread:4105 [wandb_init.py:init():628] backend started and connected
19
- 2024-02-14 11:54:05,528 INFO MainThread:4105 [wandb_run.py:_label_probe_notebook():1295] probe notebook
20
- 2024-02-14 11:54:08,528 INFO MainThread:4105 [wandb_init.py:init():720] updated telemetry
21
- 2024-02-14 11:54:08,531 INFO MainThread:4105 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
22
- 2024-02-14 11:54:08,756 INFO MainThread:4105 [wandb_run.py:_on_init():2262] communicating current version
23
- 2024-02-14 11:54:08,885 INFO MainThread:4105 [wandb_run.py:_on_init():2271] got version response
24
- 2024-02-14 11:54:08,885 INFO MainThread:4105 [wandb_init.py:init():804] starting run threads in backend
25
- 2024-02-14 11:54:08,936 INFO MainThread:4105 [wandb_run.py:_console_start():2241] atexit reg
26
- 2024-02-14 11:54:08,936 INFO MainThread:4105 [wandb_run.py:_redirect():2096] redirect: wrap_raw
27
- 2024-02-14 11:54:08,936 INFO MainThread:4105 [wandb_run.py:_redirect():2161] Wrapping output streams.
28
- 2024-02-14 11:54:08,936 INFO MainThread:4105 [wandb_run.py:_redirect():2186] Redirects installed.
29
- 2024-02-14 11:54:08,937 INFO MainThread:4105 [wandb_init.py:init():847] run started, returning control to user process
30
- 2024-02-14 11:54:08,940 INFO MainThread:4105 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'vilsonrodrigues/falcon-7b-instruct-sharded', 'transformers_version': '4.37.2', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'vilsonrodrigues/falcon-7b-instruct-sharded--configuration_falcon.FalconConfig', 'AutoModel': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', 'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 180, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb14_11-53-30_f4b889916ade', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': None, 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
31
- 2024-02-14 12:22:52,244 INFO MainThread:4105 [jupyter.py:save_ipynb():373] not saving jupyter notebook
32
- 2024-02-14 12:22:52,244 INFO MainThread:4105 [wandb_init.py:_pause_backend():437] pausing backend
33
- 2024-02-14 12:49:31,960 INFO MainThread:4105 [wandb_init.py:_resume_backend():442] resuming backend
34
- 2024-02-14 12:49:31,964 INFO MainThread:4105 [jupyter.py:save_ipynb():373] not saving jupyter notebook
35
- 2024-02-14 12:49:31,964 INFO MainThread:4105 [wandb_init.py:_pause_backend():437] pausing backend
36
- 2024-02-14 12:49:34,168 INFO MainThread:4105 [wandb_init.py:_resume_backend():442] resuming backend
37
- 2024-02-14 12:49:34,432 INFO MainThread:4105 [jupyter.py:save_ipynb():373] not saving jupyter notebook
38
- 2024-02-14 12:49:34,433 INFO MainThread:4105 [wandb_init.py:_pause_backend():437] pausing backend
39
- 2024-02-14 12:49:36,951 INFO MainThread:4105 [wandb_init.py:_resume_backend():442] resuming backend
40
- 2024-02-14 12:49:36,958 INFO MainThread:4105 [jupyter.py:save_ipynb():373] not saving jupyter notebook
41
- 2024-02-14 12:49:36,958 INFO MainThread:4105 [wandb_init.py:_pause_backend():437] pausing backend
42
- 2024-02-14 12:49:38,538 INFO MainThread:4105 [wandb_init.py:_resume_backend():442] resuming backend
43
- 2024-02-14 12:49:39,001 INFO MainThread:4105 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'vilsonrodrigues/falcon-7b-instruct-sharded', 'transformers_version': '4.37.2', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'vilsonrodrigues/falcon-7b-instruct-sharded--configuration_falcon.FalconConfig', 'AutoModel': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', 'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 250, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb14_12-49-31_f4b889916ade', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': None, 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
 
1
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_setup.py:_flush():76] Current SDK version is 0.16.3
2
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_setup.py:_flush():76] Configure stats pid to 34240
3
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
4
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_setup.py:_flush():76] Loading settings from /content/wandb/settings
5
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
8
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_init.py:_log_setup():526] Logging user logs to /content/wandb/run-20240214_125429-fe3s7gan/logs/debug.log
9
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_init.py:_log_setup():527] Logging internal logs to /content/wandb/run-20240214_125429-fe3s7gan/logs/debug-internal.log
10
+ 2024-02-14 12:54:29,376 INFO MainThread:34240 [wandb_init.py:_jupyter_setup():472] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7db6b2e92350>
11
+ 2024-02-14 12:54:29,376 INFO MainThread:34240 [wandb_init.py:init():566] calling init triggers
12
+ 2024-02-14 12:54:29,376 INFO MainThread:34240 [wandb_init.py:init():573] wandb.init called with sweep_config: {}
 
13
  config: {}
14
+ 2024-02-14 12:54:29,376 INFO MainThread:34240 [wandb_init.py:init():616] starting backend
15
+ 2024-02-14 12:54:29,376 INFO MainThread:34240 [wandb_init.py:init():620] setting up manager
16
+ 2024-02-14 12:54:29,378 INFO MainThread:34240 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2024-02-14 12:54:29,380 INFO MainThread:34240 [wandb_init.py:init():628] backend started and connected
18
+ 2024-02-14 12:54:29,388 INFO MainThread:34240 [wandb_run.py:_label_probe_notebook():1295] probe notebook
19
+ 2024-02-14 12:54:30,446 INFO MainThread:34240 [wandb_init.py:init():720] updated telemetry
20
+ 2024-02-14 12:54:30,449 INFO MainThread:34240 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
21
+ 2024-02-14 12:54:30,681 INFO MainThread:34240 [wandb_run.py:_on_init():2262] communicating current version
22
+ 2024-02-14 12:54:30,810 INFO MainThread:34240 [wandb_run.py:_on_init():2271] got version response
23
+ 2024-02-14 12:54:30,810 INFO MainThread:34240 [wandb_init.py:init():804] starting run threads in backend
24
+ 2024-02-14 12:54:30,856 INFO MainThread:34240 [wandb_run.py:_console_start():2241] atexit reg
25
+ 2024-02-14 12:54:30,857 INFO MainThread:34240 [wandb_run.py:_redirect():2096] redirect: wrap_raw
26
+ 2024-02-14 12:54:30,857 INFO MainThread:34240 [wandb_run.py:_redirect():2161] Wrapping output streams.
27
+ 2024-02-14 12:54:30,857 INFO MainThread:34240 [wandb_run.py:_redirect():2186] Redirects installed.
28
+ 2024-02-14 12:54:30,858 INFO MainThread:34240 [wandb_init.py:init():847] run started, returning control to user process
29
+ 2024-02-14 12:54:30,862 INFO MainThread:34240 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'vilsonrodrigues/falcon-7b-instruct-sharded', 'transformers_version': '4.37.2', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'vilsonrodrigues/falcon-7b-instruct-sharded--configuration_falcon.FalconConfig', 'AutoModel': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', 'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 250, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb14_12-54-25_f4b889916ade', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': None, 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20240214_115405-518dheci/files/output.log CHANGED
@@ -1 +1,2 @@
1
 
 
 
1
 
2
+ Checkpoint destination directory ./checkpoint-10 already exists and is non-empty.Saving will proceed but saved results may be invalid.
wandb/run-20240214_115405-518dheci/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"train/loss": 0.1494, "train/learning_rate": 0.00019996629653035126, "train/epoch": 0.8, "train/global_step": 10, "_timestamp": 1707915057.2341144, "_runtime": 3411.7149634361267, "_step": 19, "train/train_runtime": 1744.0643, "train/train_samples_per_second": 1.651, "train/train_steps_per_second": 0.103, "train/total_flos": 1.1953245987458304e+16, "train/train_loss": 0.36850863695144653}
 
1
+ {"train/loss": 0.1494, "train/learning_rate": 0.00019996629653035126, "train/epoch": 0.8, "train/global_step": 10, "_timestamp": 1707915057.2341144, "_runtime": 3411.7149634361267, "_step": 19, "train/train_runtime": 1744.0643, "train/train_samples_per_second": 1.651, "train/train_steps_per_second": 0.103, "train/total_flos": 1.1953245987458304e+16, "train/train_loss": 0.36850863695144653, "_wandb": {"runtime": 1831}}
wandb/run-20240214_115405-518dheci/logs/debug-internal.log CHANGED
@@ -1646,3 +1646,149 @@
1646
  2024-02-14 12:51:09,112 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: internal_messages
1647
  2024-02-14 12:51:09,112 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: stop_status
1648
  2024-02-14 12:51:09,112 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: stop_status
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1646
  2024-02-14 12:51:09,112 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: internal_messages
1647
  2024-02-14 12:51:09,112 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: stop_status
1648
  2024-02-14 12:51:09,112 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: stop_status
1649
+ 2024-02-14 12:51:13,191 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: status_report
1650
+ 2024-02-14 12:51:18,192 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: status_report
1651
+ 2024-02-14 12:51:23,193 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: status_report
1652
+ 2024-02-14 12:51:24,111 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: internal_messages
1653
+ 2024-02-14 12:51:24,112 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: stop_status
1654
+ 2024-02-14 12:51:24,112 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: stop_status
1655
+ 2024-02-14 12:51:26,420 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: pause
1656
+ 2024-02-14 12:51:26,420 INFO HandlerThread:19614 [handler.py:handle_request_pause():708] stopping system metrics thread
1657
+ 2024-02-14 12:51:26,421 INFO HandlerThread:19614 [system_monitor.py:finish():203] Stopping system monitor
1658
+ 2024-02-14 12:51:26,421 DEBUG SystemMonitor:19614 [system_monitor.py:_start():179] Finished system metrics aggregation loop
1659
+ 2024-02-14 12:51:26,421 DEBUG SystemMonitor:19614 [system_monitor.py:_start():183] Publishing last batch of metrics
1660
+ 2024-02-14 12:51:26,422 INFO HandlerThread:19614 [interfaces.py:finish():202] Joined cpu monitor
1661
+ 2024-02-14 12:51:26,422 INFO HandlerThread:19614 [interfaces.py:finish():202] Joined disk monitor
1662
+ 2024-02-14 12:51:26,429 INFO HandlerThread:19614 [interfaces.py:finish():202] Joined gpu monitor
1663
+ 2024-02-14 12:51:26,429 INFO HandlerThread:19614 [interfaces.py:finish():202] Joined memory monitor
1664
+ 2024-02-14 12:51:26,429 INFO HandlerThread:19614 [interfaces.py:finish():202] Joined network monitor
1665
+ 2024-02-14 12:51:26,430 DEBUG SenderThread:19614 [sender.py:send():382] send: stats
1666
+ 2024-02-14 12:51:28,430 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: status_report
1667
+ 2024-02-14 12:51:33,431 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: status_report
1668
+ 2024-02-14 12:51:34,103 DEBUG SenderThread:19614 [sender.py:send():382] send: exit
1669
+ 2024-02-14 12:51:34,103 INFO SenderThread:19614 [sender.py:send_exit():589] handling exit code: 0
1670
+ 2024-02-14 12:51:34,103 INFO SenderThread:19614 [sender.py:send_exit():591] handling runtime: 1831
1671
+ 2024-02-14 12:51:34,103 INFO SenderThread:19614 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
1672
+ 2024-02-14 12:51:34,104 INFO SenderThread:19614 [sender.py:send_exit():597] send defer
1673
+ 2024-02-14 12:51:34,104 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: defer
1674
+ 2024-02-14 12:51:34,104 INFO HandlerThread:19614 [handler.py:handle_request_defer():172] handle defer: 0
1675
+ 2024-02-14 12:51:34,104 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: defer
1676
+ 2024-02-14 12:51:34,104 INFO SenderThread:19614 [sender.py:send_request_defer():613] handle sender defer: 0
1677
+ 2024-02-14 12:51:34,104 INFO SenderThread:19614 [sender.py:transition_state():617] send defer: 1
1678
+ 2024-02-14 12:51:34,104 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: defer
1679
+ 2024-02-14 12:51:34,104 INFO HandlerThread:19614 [handler.py:handle_request_defer():172] handle defer: 1
1680
+ 2024-02-14 12:51:34,105 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: defer
1681
+ 2024-02-14 12:51:34,105 INFO SenderThread:19614 [sender.py:send_request_defer():613] handle sender defer: 1
1682
+ 2024-02-14 12:51:34,105 INFO SenderThread:19614 [sender.py:transition_state():617] send defer: 2
1683
+ 2024-02-14 12:51:34,105 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: defer
1684
+ 2024-02-14 12:51:34,105 INFO HandlerThread:19614 [handler.py:handle_request_defer():172] handle defer: 2
1685
+ 2024-02-14 12:51:34,105 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: defer
1686
+ 2024-02-14 12:51:34,105 INFO SenderThread:19614 [sender.py:send_request_defer():613] handle sender defer: 2
1687
+ 2024-02-14 12:51:34,105 INFO SenderThread:19614 [sender.py:transition_state():617] send defer: 3
1688
+ 2024-02-14 12:51:34,105 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: defer
1689
+ 2024-02-14 12:51:34,105 INFO HandlerThread:19614 [handler.py:handle_request_defer():172] handle defer: 3
1690
+ 2024-02-14 12:51:34,105 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: defer
1691
+ 2024-02-14 12:51:34,105 INFO SenderThread:19614 [sender.py:send_request_defer():613] handle sender defer: 3
1692
+ 2024-02-14 12:51:34,105 INFO SenderThread:19614 [sender.py:transition_state():617] send defer: 4
1693
+ 2024-02-14 12:51:34,105 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: defer
1694
+ 2024-02-14 12:51:34,105 INFO HandlerThread:19614 [handler.py:handle_request_defer():172] handle defer: 4
1695
+ 2024-02-14 12:51:34,106 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: defer
1696
+ 2024-02-14 12:51:34,106 INFO SenderThread:19614 [sender.py:send_request_defer():613] handle sender defer: 4
1697
+ 2024-02-14 12:51:34,106 INFO SenderThread:19614 [sender.py:transition_state():617] send defer: 5
1698
+ 2024-02-14 12:51:34,106 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: defer
1699
+ 2024-02-14 12:51:34,106 INFO HandlerThread:19614 [handler.py:handle_request_defer():172] handle defer: 5
1700
+ 2024-02-14 12:51:34,106 DEBUG SenderThread:19614 [sender.py:send():382] send: summary
1701
+ 2024-02-14 12:51:34,106 INFO SenderThread:19614 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
1702
+ 2024-02-14 12:51:34,107 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: defer
1703
+ 2024-02-14 12:51:34,107 INFO SenderThread:19614 [sender.py:send_request_defer():613] handle sender defer: 5
1704
+ 2024-02-14 12:51:34,107 INFO SenderThread:19614 [sender.py:transition_state():617] send defer: 6
1705
+ 2024-02-14 12:51:34,107 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: defer
1706
+ 2024-02-14 12:51:34,107 INFO HandlerThread:19614 [handler.py:handle_request_defer():172] handle defer: 6
1707
+ 2024-02-14 12:51:34,107 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: defer
1708
+ 2024-02-14 12:51:34,107 INFO SenderThread:19614 [sender.py:send_request_defer():613] handle sender defer: 6
1709
+ 2024-02-14 12:51:34,107 INFO SenderThread:19614 [sender.py:transition_state():617] send defer: 7
1710
+ 2024-02-14 12:51:34,107 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: status_report
1711
+ 2024-02-14 12:51:34,107 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: defer
1712
+ 2024-02-14 12:51:34,107 INFO HandlerThread:19614 [handler.py:handle_request_defer():172] handle defer: 7
1713
+ 2024-02-14 12:51:34,108 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: defer
1714
+ 2024-02-14 12:51:34,108 INFO SenderThread:19614 [sender.py:send_request_defer():613] handle sender defer: 7
1715
+ 2024-02-14 12:51:34,914 INFO Thread-12 :19614 [dir_watcher.py:_on_file_modified():288] file/dir modified: /content/wandb/run-20240214_115405-518dheci/files/wandb-summary.json
1716
+ 2024-02-14 12:51:35,103 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: poll_exit
1717
+ 2024-02-14 12:51:35,320 INFO SenderThread:19614 [sender.py:transition_state():617] send defer: 8
1718
+ 2024-02-14 12:51:35,321 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: poll_exit
1719
+ 2024-02-14 12:51:35,321 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: defer
1720
+ 2024-02-14 12:51:35,321 INFO HandlerThread:19614 [handler.py:handle_request_defer():172] handle defer: 8
1721
+ 2024-02-14 12:51:35,321 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: defer
1722
+ 2024-02-14 12:51:35,321 INFO SenderThread:19614 [sender.py:send_request_defer():613] handle sender defer: 8
1723
+ 2024-02-14 12:51:35,322 INFO SenderThread:19614 [job_builder.py:build():296] Attempting to build job artifact
1724
+ 2024-02-14 12:51:35,323 INFO SenderThread:19614 [job_builder.py:_get_source_type():437] no source found
1725
+ 2024-02-14 12:51:35,323 INFO SenderThread:19614 [sender.py:transition_state():617] send defer: 9
1726
+ 2024-02-14 12:51:35,323 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: defer
1727
+ 2024-02-14 12:51:35,323 INFO HandlerThread:19614 [handler.py:handle_request_defer():172] handle defer: 9
1728
+ 2024-02-14 12:51:35,323 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: defer
1729
+ 2024-02-14 12:51:35,323 INFO SenderThread:19614 [sender.py:send_request_defer():613] handle sender defer: 9
1730
+ 2024-02-14 12:51:35,323 INFO SenderThread:19614 [dir_watcher.py:finish():358] shutting down directory watcher
1731
+ 2024-02-14 12:51:35,914 INFO Thread-12 :19614 [dir_watcher.py:_on_file_modified():288] file/dir modified: /content/wandb/run-20240214_115405-518dheci/files/output.log
1732
+ 2024-02-14 12:51:35,915 INFO SenderThread:19614 [dir_watcher.py:finish():388] scan: /content/wandb/run-20240214_115405-518dheci/files
1733
+ 2024-02-14 12:51:35,915 INFO SenderThread:19614 [dir_watcher.py:finish():402] scan save: /content/wandb/run-20240214_115405-518dheci/files/config.yaml config.yaml
1734
+ 2024-02-14 12:51:35,915 INFO SenderThread:19614 [dir_watcher.py:finish():402] scan save: /content/wandb/run-20240214_115405-518dheci/files/requirements.txt requirements.txt
1735
+ 2024-02-14 12:51:35,916 INFO SenderThread:19614 [dir_watcher.py:finish():402] scan save: /content/wandb/run-20240214_115405-518dheci/files/output.log output.log
1736
+ 2024-02-14 12:51:35,922 INFO SenderThread:19614 [dir_watcher.py:finish():402] scan save: /content/wandb/run-20240214_115405-518dheci/files/wandb-metadata.json wandb-metadata.json
1737
+ 2024-02-14 12:51:35,922 INFO SenderThread:19614 [dir_watcher.py:finish():402] scan save: /content/wandb/run-20240214_115405-518dheci/files/wandb-summary.json wandb-summary.json
1738
+ 2024-02-14 12:51:35,924 INFO SenderThread:19614 [sender.py:transition_state():617] send defer: 10
1739
+ 2024-02-14 12:51:35,926 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: defer
1740
+ 2024-02-14 12:51:35,926 INFO HandlerThread:19614 [handler.py:handle_request_defer():172] handle defer: 10
1741
+ 2024-02-14 12:51:35,927 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: defer
1742
+ 2024-02-14 12:51:35,927 INFO SenderThread:19614 [sender.py:send_request_defer():613] handle sender defer: 10
1743
+ 2024-02-14 12:51:35,927 INFO SenderThread:19614 [file_pusher.py:finish():172] shutting down file pusher
1744
+ 2024-02-14 12:51:36,104 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: poll_exit
1745
+ 2024-02-14 12:51:36,104 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: poll_exit
1746
+ 2024-02-14 12:51:36,344 INFO wandb-upload_0:19614 [upload_job.py:push():131] Uploaded file /content/wandb/run-20240214_115405-518dheci/files/config.yaml
1747
+ 2024-02-14 12:51:36,381 INFO wandb-upload_3:19614 [upload_job.py:push():131] Uploaded file /content/wandb/run-20240214_115405-518dheci/files/wandb-summary.json
1748
+ 2024-02-14 12:51:36,386 INFO wandb-upload_1:19614 [upload_job.py:push():131] Uploaded file /content/wandb/run-20240214_115405-518dheci/files/requirements.txt
1749
+ 2024-02-14 12:51:36,484 INFO wandb-upload_2:19614 [upload_job.py:push():131] Uploaded file /content/wandb/run-20240214_115405-518dheci/files/output.log
1750
+ 2024-02-14 12:51:36,684 INFO Thread-11 (_thread_body):19614 [sender.py:transition_state():617] send defer: 11
1751
+ 2024-02-14 12:51:36,685 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: defer
1752
+ 2024-02-14 12:51:36,685 INFO HandlerThread:19614 [handler.py:handle_request_defer():172] handle defer: 11
1753
+ 2024-02-14 12:51:36,685 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: defer
1754
+ 2024-02-14 12:51:36,686 INFO SenderThread:19614 [sender.py:send_request_defer():613] handle sender defer: 11
1755
+ 2024-02-14 12:51:36,686 INFO SenderThread:19614 [file_pusher.py:join():178] waiting for file pusher
1756
+ 2024-02-14 12:51:36,686 INFO SenderThread:19614 [sender.py:transition_state():617] send defer: 12
1757
+ 2024-02-14 12:51:36,686 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: defer
1758
+ 2024-02-14 12:51:36,686 INFO HandlerThread:19614 [handler.py:handle_request_defer():172] handle defer: 12
1759
+ 2024-02-14 12:51:36,687 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: defer
1760
+ 2024-02-14 12:51:36,687 INFO SenderThread:19614 [sender.py:send_request_defer():613] handle sender defer: 12
1761
+ 2024-02-14 12:51:36,687 INFO SenderThread:19614 [file_stream.py:finish():595] file stream finish called
1762
+ 2024-02-14 12:51:36,887 INFO SenderThread:19614 [file_stream.py:finish():599] file stream finish is done
1763
+ 2024-02-14 12:51:36,887 INFO SenderThread:19614 [sender.py:transition_state():617] send defer: 13
1764
+ 2024-02-14 12:51:36,887 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: defer
1765
+ 2024-02-14 12:51:36,887 INFO HandlerThread:19614 [handler.py:handle_request_defer():172] handle defer: 13
1766
+ 2024-02-14 12:51:36,887 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: defer
1767
+ 2024-02-14 12:51:36,887 INFO SenderThread:19614 [sender.py:send_request_defer():613] handle sender defer: 13
1768
+ 2024-02-14 12:51:36,887 INFO SenderThread:19614 [sender.py:transition_state():617] send defer: 14
1769
+ 2024-02-14 12:51:36,888 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: defer
1770
+ 2024-02-14 12:51:36,888 INFO HandlerThread:19614 [handler.py:handle_request_defer():172] handle defer: 14
1771
+ 2024-02-14 12:51:36,888 DEBUG SenderThread:19614 [sender.py:send():382] send: final
1772
+ 2024-02-14 12:51:36,888 DEBUG SenderThread:19614 [sender.py:send():382] send: footer
1773
+ 2024-02-14 12:51:36,888 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: defer
1774
+ 2024-02-14 12:51:36,888 INFO SenderThread:19614 [sender.py:send_request_defer():613] handle sender defer: 14
1775
+ 2024-02-14 12:51:36,889 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: poll_exit
1776
+ 2024-02-14 12:51:36,889 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: poll_exit
1777
+ 2024-02-14 12:51:36,889 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: poll_exit
1778
+ 2024-02-14 12:51:36,890 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: poll_exit
1779
+ 2024-02-14 12:51:36,890 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: server_info
1780
+ 2024-02-14 12:51:36,890 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: server_info
1781
+ 2024-02-14 12:51:36,891 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: get_summary
1782
+ 2024-02-14 12:51:36,893 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: sampled_history
1783
+ 2024-02-14 12:51:36,894 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: internal_messages
1784
+ 2024-02-14 12:51:36,894 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: job_info
1785
+ 2024-02-14 12:51:36,951 DEBUG SenderThread:19614 [sender.py:send_request():409] send_request: job_info
1786
+ 2024-02-14 12:51:36,951 INFO MainThread:19614 [wandb_run.py:_footer_history_summary_info():3866] rendering history
1787
+ 2024-02-14 12:51:36,952 INFO MainThread:19614 [wandb_run.py:_footer_history_summary_info():3898] rendering summary
1788
+ 2024-02-14 12:51:36,952 INFO MainThread:19614 [wandb_run.py:_footer_sync_info():3825] logging synced files
1789
+ 2024-02-14 12:51:36,952 DEBUG HandlerThread:19614 [handler.py:handle_request():146] handle_request: shutdown
1790
+ 2024-02-14 12:51:36,952 INFO HandlerThread:19614 [handler.py:finish():869] shutting down handler
1791
+ 2024-02-14 12:51:37,894 INFO WriterThread:19614 [datastore.py:close():296] close: /content/wandb/run-20240214_115405-518dheci/run-518dheci.wandb
1792
+ 2024-02-14 12:51:37,951 INFO SenderThread:19614 [sender.py:finish():1572] shutting down sender
1793
+ 2024-02-14 12:51:37,952 INFO SenderThread:19614 [file_pusher.py:finish():172] shutting down file pusher
1794
+ 2024-02-14 12:51:37,952 INFO SenderThread:19614 [file_pusher.py:join():178] waiting for file pusher
wandb/run-20240214_115405-518dheci/logs/debug.log CHANGED
@@ -41,3 +41,5 @@ config: {}
41
  2024-02-14 12:49:36,958 INFO MainThread:4105 [wandb_init.py:_pause_backend():437] pausing backend
42
  2024-02-14 12:49:38,538 INFO MainThread:4105 [wandb_init.py:_resume_backend():442] resuming backend
43
  2024-02-14 12:49:39,001 INFO MainThread:4105 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'vilsonrodrigues/falcon-7b-instruct-sharded', 'transformers_version': '4.37.2', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'vilsonrodrigues/falcon-7b-instruct-sharded--configuration_falcon.FalconConfig', 'AutoModel': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', 'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 250, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb14_12-49-31_f4b889916ade', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': None, 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
 
 
 
41
  2024-02-14 12:49:36,958 INFO MainThread:4105 [wandb_init.py:_pause_backend():437] pausing backend
42
  2024-02-14 12:49:38,538 INFO MainThread:4105 [wandb_init.py:_resume_backend():442] resuming backend
43
  2024-02-14 12:49:39,001 INFO MainThread:4105 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'vilsonrodrigues/falcon-7b-instruct-sharded', 'transformers_version': '4.37.2', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'vilsonrodrigues/falcon-7b-instruct-sharded--configuration_falcon.FalconConfig', 'AutoModel': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', 'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 250, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb14_12-49-31_f4b889916ade', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': None, 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
44
+ 2024-02-14 12:51:26,419 INFO MainThread:4105 [jupyter.py:save_ipynb():373] not saving jupyter notebook
45
+ 2024-02-14 12:51:26,420 INFO MainThread:4105 [wandb_init.py:_pause_backend():437] pausing backend
wandb/run-20240214_115405-518dheci/run-518dheci.wandb CHANGED
Binary files a/wandb/run-20240214_115405-518dheci/run-518dheci.wandb and b/wandb/run-20240214_115405-518dheci/run-518dheci.wandb differ
 
wandb/run-20240214_125429-fe3s7gan/files/config.yaml ADDED
@@ -0,0 +1,660 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.10.12
7
+ cli_version: 0.16.3
8
+ framework: huggingface
9
+ huggingface_version: 4.37.2
10
+ is_jupyter_run: true
11
+ is_kaggle_kernel: false
12
+ start_time: 1707915269.380721
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 2
17
+ - 3
18
+ - 5
19
+ - 11
20
+ - 12
21
+ - 49
22
+ - 51
23
+ - 53
24
+ - 55
25
+ - 71
26
+ - 84
27
+ - 98
28
+ 2:
29
+ - 1
30
+ - 2
31
+ - 3
32
+ - 5
33
+ - 11
34
+ - 12
35
+ - 49
36
+ - 51
37
+ - 53
38
+ - 55
39
+ - 71
40
+ - 84
41
+ - 98
42
+ 3:
43
+ - 7
44
+ - 23
45
+ 4: 3.10.12
46
+ 5: 0.16.3
47
+ 6: 4.37.2
48
+ 8:
49
+ - 1
50
+ - 5
51
+ - 12
52
+ 9:
53
+ 1: transformers_trainer
54
+ 13: linux-x86_64
55
+ m:
56
+ - 1: train/global_step
57
+ 6:
58
+ - 3
59
+ vocab_size:
60
+ desc: null
61
+ value: 65024
62
+ hidden_size:
63
+ desc: null
64
+ value: 4544
65
+ num_hidden_layers:
66
+ desc: null
67
+ value: 32
68
+ num_attention_heads:
69
+ desc: null
70
+ value: 71
71
+ layer_norm_epsilon:
72
+ desc: null
73
+ value: 1.0e-05
74
+ initializer_range:
75
+ desc: null
76
+ value: 0.02
77
+ use_cache:
78
+ desc: null
79
+ value: false
80
+ hidden_dropout:
81
+ desc: null
82
+ value: 0.0
83
+ attention_dropout:
84
+ desc: null
85
+ value: 0.0
86
+ bos_token_id:
87
+ desc: null
88
+ value: 11
89
+ eos_token_id:
90
+ desc: null
91
+ value: 11
92
+ num_kv_heads:
93
+ desc: null
94
+ value: 71
95
+ alibi:
96
+ desc: null
97
+ value: false
98
+ new_decoder_architecture:
99
+ desc: null
100
+ value: false
101
+ multi_query:
102
+ desc: null
103
+ value: true
104
+ parallel_attn:
105
+ desc: null
106
+ value: true
107
+ bias:
108
+ desc: null
109
+ value: false
110
+ return_dict:
111
+ desc: null
112
+ value: true
113
+ output_hidden_states:
114
+ desc: null
115
+ value: false
116
+ output_attentions:
117
+ desc: null
118
+ value: false
119
+ torchscript:
120
+ desc: null
121
+ value: false
122
+ torch_dtype:
123
+ desc: null
124
+ value: bfloat16
125
+ use_bfloat16:
126
+ desc: null
127
+ value: false
128
+ tf_legacy_loss:
129
+ desc: null
130
+ value: false
131
+ pruned_heads:
132
+ desc: null
133
+ value: {}
134
+ tie_word_embeddings:
135
+ desc: null
136
+ value: true
137
+ chunk_size_feed_forward:
138
+ desc: null
139
+ value: 0
140
+ is_encoder_decoder:
141
+ desc: null
142
+ value: false
143
+ is_decoder:
144
+ desc: null
145
+ value: false
146
+ cross_attention_hidden_size:
147
+ desc: null
148
+ value: null
149
+ add_cross_attention:
150
+ desc: null
151
+ value: false
152
+ tie_encoder_decoder:
153
+ desc: null
154
+ value: false
155
+ max_length:
156
+ desc: null
157
+ value: 20
158
+ min_length:
159
+ desc: null
160
+ value: 0
161
+ do_sample:
162
+ desc: null
163
+ value: false
164
+ early_stopping:
165
+ desc: null
166
+ value: false
167
+ num_beams:
168
+ desc: null
169
+ value: 1
170
+ num_beam_groups:
171
+ desc: null
172
+ value: 1
173
+ diversity_penalty:
174
+ desc: null
175
+ value: 0.0
176
+ temperature:
177
+ desc: null
178
+ value: 1.0
179
+ top_k:
180
+ desc: null
181
+ value: 50
182
+ top_p:
183
+ desc: null
184
+ value: 1.0
185
+ typical_p:
186
+ desc: null
187
+ value: 1.0
188
+ repetition_penalty:
189
+ desc: null
190
+ value: 1.0
191
+ length_penalty:
192
+ desc: null
193
+ value: 1.0
194
+ no_repeat_ngram_size:
195
+ desc: null
196
+ value: 0
197
+ encoder_no_repeat_ngram_size:
198
+ desc: null
199
+ value: 0
200
+ bad_words_ids:
201
+ desc: null
202
+ value: null
203
+ num_return_sequences:
204
+ desc: null
205
+ value: 1
206
+ output_scores:
207
+ desc: null
208
+ value: false
209
+ return_dict_in_generate:
210
+ desc: null
211
+ value: false
212
+ forced_bos_token_id:
213
+ desc: null
214
+ value: null
215
+ forced_eos_token_id:
216
+ desc: null
217
+ value: null
218
+ remove_invalid_values:
219
+ desc: null
220
+ value: false
221
+ exponential_decay_length_penalty:
222
+ desc: null
223
+ value: null
224
+ suppress_tokens:
225
+ desc: null
226
+ value: null
227
+ begin_suppress_tokens:
228
+ desc: null
229
+ value: null
230
+ architectures:
231
+ desc: null
232
+ value:
233
+ - FalconForCausalLM
234
+ finetuning_task:
235
+ desc: null
236
+ value: null
237
+ id2label:
238
+ desc: null
239
+ value:
240
+ '0': LABEL_0
241
+ '1': LABEL_1
242
+ label2id:
243
+ desc: null
244
+ value:
245
+ LABEL_0: 0
246
+ LABEL_1: 1
247
+ tokenizer_class:
248
+ desc: null
249
+ value: null
250
+ prefix:
251
+ desc: null
252
+ value: null
253
+ pad_token_id:
254
+ desc: null
255
+ value: null
256
+ sep_token_id:
257
+ desc: null
258
+ value: null
259
+ decoder_start_token_id:
260
+ desc: null
261
+ value: null
262
+ task_specific_params:
263
+ desc: null
264
+ value: null
265
+ problem_type:
266
+ desc: null
267
+ value: null
268
+ _name_or_path:
269
+ desc: null
270
+ value: vilsonrodrigues/falcon-7b-instruct-sharded
271
+ transformers_version:
272
+ desc: null
273
+ value: 4.37.2
274
+ apply_residual_connection_post_layernorm:
275
+ desc: null
276
+ value: false
277
+ auto_map:
278
+ desc: null
279
+ value:
280
+ AutoConfig: vilsonrodrigues/falcon-7b-instruct-sharded--configuration_falcon.FalconConfig
281
+ AutoModel: vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconModel
282
+ AutoModelForSequenceClassification: vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForSequenceClassification
283
+ AutoModelForTokenClassification: vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForTokenClassification
284
+ AutoModelForQuestionAnswering: vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForQuestionAnswering
285
+ AutoModelForCausalLM: vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForCausalLM
286
+ model_type:
287
+ desc: null
288
+ value: falcon
289
+ quantization_config:
290
+ desc: null
291
+ value:
292
+ quant_method: QuantizationMethod.BITS_AND_BYTES
293
+ load_in_8bit: false
294
+ load_in_4bit: true
295
+ llm_int8_threshold: 6.0
296
+ llm_int8_skip_modules: null
297
+ llm_int8_enable_fp32_cpu_offload: false
298
+ llm_int8_has_fp16_weight: false
299
+ bnb_4bit_quant_type: nf4
300
+ bnb_4bit_use_double_quant: true
301
+ bnb_4bit_compute_dtype: float16
302
+ output_dir:
303
+ desc: null
304
+ value: ./
305
+ overwrite_output_dir:
306
+ desc: null
307
+ value: false
308
+ do_train:
309
+ desc: null
310
+ value: false
311
+ do_eval:
312
+ desc: null
313
+ value: false
314
+ do_predict:
315
+ desc: null
316
+ value: false
317
+ evaluation_strategy:
318
+ desc: null
319
+ value: 'no'
320
+ prediction_loss_only:
321
+ desc: null
322
+ value: false
323
+ per_device_train_batch_size:
324
+ desc: null
325
+ value: 4
326
+ per_device_eval_batch_size:
327
+ desc: null
328
+ value: 8
329
+ per_gpu_train_batch_size:
330
+ desc: null
331
+ value: null
332
+ per_gpu_eval_batch_size:
333
+ desc: null
334
+ value: null
335
+ gradient_accumulation_steps:
336
+ desc: null
337
+ value: 4
338
+ eval_accumulation_steps:
339
+ desc: null
340
+ value: null
341
+ eval_delay:
342
+ desc: null
343
+ value: 0
344
+ learning_rate:
345
+ desc: null
346
+ value: 0.0002
347
+ weight_decay:
348
+ desc: null
349
+ value: 0.0
350
+ adam_beta1:
351
+ desc: null
352
+ value: 0.9
353
+ adam_beta2:
354
+ desc: null
355
+ value: 0.999
356
+ adam_epsilon:
357
+ desc: null
358
+ value: 1.0e-08
359
+ max_grad_norm:
360
+ desc: null
361
+ value: 0.3
362
+ num_train_epochs:
363
+ desc: null
364
+ value: 3.0
365
+ max_steps:
366
+ desc: null
367
+ value: 250
368
+ lr_scheduler_type:
369
+ desc: null
370
+ value: cosine
371
+ lr_scheduler_kwargs:
372
+ desc: null
373
+ value: {}
374
+ warmup_ratio:
375
+ desc: null
376
+ value: 0.03
377
+ warmup_steps:
378
+ desc: null
379
+ value: 0
380
+ log_level:
381
+ desc: null
382
+ value: passive
383
+ log_level_replica:
384
+ desc: null
385
+ value: warning
386
+ log_on_each_node:
387
+ desc: null
388
+ value: true
389
+ logging_dir:
390
+ desc: null
391
+ value: ./runs/Feb14_12-54-25_f4b889916ade
392
+ logging_strategy:
393
+ desc: null
394
+ value: steps
395
+ logging_first_step:
396
+ desc: null
397
+ value: false
398
+ logging_steps:
399
+ desc: null
400
+ value: 10
401
+ logging_nan_inf_filter:
402
+ desc: null
403
+ value: true
404
+ save_strategy:
405
+ desc: null
406
+ value: steps
407
+ save_steps:
408
+ desc: null
409
+ value: 10
410
+ save_total_limit:
411
+ desc: null
412
+ value: null
413
+ save_safetensors:
414
+ desc: null
415
+ value: true
416
+ save_on_each_node:
417
+ desc: null
418
+ value: false
419
+ save_only_model:
420
+ desc: null
421
+ value: false
422
+ no_cuda:
423
+ desc: null
424
+ value: false
425
+ use_cpu:
426
+ desc: null
427
+ value: false
428
+ use_mps_device:
429
+ desc: null
430
+ value: false
431
+ seed:
432
+ desc: null
433
+ value: 42
434
+ data_seed:
435
+ desc: null
436
+ value: null
437
+ jit_mode_eval:
438
+ desc: null
439
+ value: false
440
+ use_ipex:
441
+ desc: null
442
+ value: false
443
+ bf16:
444
+ desc: null
445
+ value: false
446
+ fp16:
447
+ desc: null
448
+ value: true
449
+ fp16_opt_level:
450
+ desc: null
451
+ value: O1
452
+ half_precision_backend:
453
+ desc: null
454
+ value: auto
455
+ bf16_full_eval:
456
+ desc: null
457
+ value: false
458
+ fp16_full_eval:
459
+ desc: null
460
+ value: false
461
+ tf32:
462
+ desc: null
463
+ value: null
464
+ local_rank:
465
+ desc: null
466
+ value: 0
467
+ ddp_backend:
468
+ desc: null
469
+ value: null
470
+ tpu_num_cores:
471
+ desc: null
472
+ value: null
473
+ tpu_metrics_debug:
474
+ desc: null
475
+ value: false
476
+ debug:
477
+ desc: null
478
+ value: []
479
+ dataloader_drop_last:
480
+ desc: null
481
+ value: false
482
+ eval_steps:
483
+ desc: null
484
+ value: null
485
+ dataloader_num_workers:
486
+ desc: null
487
+ value: 0
488
+ past_index:
489
+ desc: null
490
+ value: -1
491
+ run_name:
492
+ desc: null
493
+ value: ./
494
+ disable_tqdm:
495
+ desc: null
496
+ value: false
497
+ remove_unused_columns:
498
+ desc: null
499
+ value: true
500
+ label_names:
501
+ desc: null
502
+ value: null
503
+ load_best_model_at_end:
504
+ desc: null
505
+ value: false
506
+ metric_for_best_model:
507
+ desc: null
508
+ value: null
509
+ greater_is_better:
510
+ desc: null
511
+ value: null
512
+ ignore_data_skip:
513
+ desc: null
514
+ value: false
515
+ fsdp:
516
+ desc: null
517
+ value: []
518
+ fsdp_min_num_params:
519
+ desc: null
520
+ value: 0
521
+ fsdp_config:
522
+ desc: null
523
+ value:
524
+ min_num_params: 0
525
+ xla: false
526
+ xla_fsdp_grad_ckpt: false
527
+ fsdp_transformer_layer_cls_to_wrap:
528
+ desc: null
529
+ value: null
530
+ deepspeed:
531
+ desc: null
532
+ value: null
533
+ label_smoothing_factor:
534
+ desc: null
535
+ value: 0.0
536
+ optim:
537
+ desc: null
538
+ value: paged_adamw_32bit
539
+ optim_args:
540
+ desc: null
541
+ value: null
542
+ adafactor:
543
+ desc: null
544
+ value: false
545
+ group_by_length:
546
+ desc: null
547
+ value: true
548
+ length_column_name:
549
+ desc: null
550
+ value: length
551
+ report_to:
552
+ desc: null
553
+ value:
554
+ - tensorboard
555
+ - wandb
556
+ ddp_find_unused_parameters:
557
+ desc: null
558
+ value: null
559
+ ddp_bucket_cap_mb:
560
+ desc: null
561
+ value: null
562
+ ddp_broadcast_buffers:
563
+ desc: null
564
+ value: null
565
+ dataloader_pin_memory:
566
+ desc: null
567
+ value: true
568
+ dataloader_persistent_workers:
569
+ desc: null
570
+ value: false
571
+ skip_memory_metrics:
572
+ desc: null
573
+ value: true
574
+ use_legacy_prediction_loop:
575
+ desc: null
576
+ value: false
577
+ push_to_hub:
578
+ desc: null
579
+ value: true
580
+ resume_from_checkpoint:
581
+ desc: null
582
+ value: null
583
+ hub_model_id:
584
+ desc: null
585
+ value: null
586
+ hub_strategy:
587
+ desc: null
588
+ value: every_save
589
+ hub_token:
590
+ desc: null
591
+ value: <HUB_TOKEN>
592
+ hub_private_repo:
593
+ desc: null
594
+ value: false
595
+ hub_always_push:
596
+ desc: null
597
+ value: false
598
+ gradient_checkpointing:
599
+ desc: null
600
+ value: false
601
+ gradient_checkpointing_kwargs:
602
+ desc: null
603
+ value: null
604
+ include_inputs_for_metrics:
605
+ desc: null
606
+ value: false
607
+ fp16_backend:
608
+ desc: null
609
+ value: auto
610
+ push_to_hub_model_id:
611
+ desc: null
612
+ value: null
613
+ push_to_hub_organization:
614
+ desc: null
615
+ value: null
616
+ push_to_hub_token:
617
+ desc: null
618
+ value: <PUSH_TO_HUB_TOKEN>
619
+ mp_parameters:
620
+ desc: null
621
+ value: ''
622
+ auto_find_batch_size:
623
+ desc: null
624
+ value: false
625
+ full_determinism:
626
+ desc: null
627
+ value: false
628
+ torchdynamo:
629
+ desc: null
630
+ value: null
631
+ ray_scope:
632
+ desc: null
633
+ value: last
634
+ ddp_timeout:
635
+ desc: null
636
+ value: 1800
637
+ torch_compile:
638
+ desc: null
639
+ value: false
640
+ torch_compile_backend:
641
+ desc: null
642
+ value: null
643
+ torch_compile_mode:
644
+ desc: null
645
+ value: null
646
+ dispatch_batches:
647
+ desc: null
648
+ value: null
649
+ split_batches:
650
+ desc: null
651
+ value: false
652
+ include_tokens_per_second:
653
+ desc: null
654
+ value: false
655
+ include_num_input_tokens_seen:
656
+ desc: null
657
+ value: false
658
+ neftune_noise_alpha:
659
+ desc: null
660
+ value: null
wandb/run-20240214_125429-fe3s7gan/files/output.log ADDED
@@ -0,0 +1 @@
 
 
1
+
wandb/run-20240214_125429-fe3s7gan/files/requirements.txt ADDED
@@ -0,0 +1,503 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.4.0
2
+ accelerate==0.27.2
3
+ aiohttp==3.9.3
4
+ aiosignal==1.3.1
5
+ alabaster==0.7.16
6
+ albumentations==1.3.1
7
+ altair==4.2.2
8
+ annotated-types==0.6.0
9
+ anyio==3.7.1
10
+ appdirs==1.4.4
11
+ argon2-cffi-bindings==21.2.0
12
+ argon2-cffi==23.1.0
13
+ array-record==0.5.0
14
+ arviz==0.15.1
15
+ astropy==5.3.4
16
+ astunparse==1.6.3
17
+ async-timeout==4.0.3
18
+ atpublic==4.0
19
+ attrs==23.2.0
20
+ audioread==3.0.1
21
+ autograd==1.6.2
22
+ babel==2.14.0
23
+ backcall==0.2.0
24
+ beautifulsoup4==4.12.3
25
+ bidict==0.22.1
26
+ bigframes==0.20.1
27
+ bitsandbytes==0.42.0
28
+ bleach==6.1.0
29
+ blinker==1.4
30
+ blis==0.7.11
31
+ blosc2==2.0.0
32
+ bokeh==3.3.4
33
+ bqplot==0.12.42
34
+ branca==0.7.1
35
+ build==1.0.3
36
+ cachecontrol==0.14.0
37
+ cachetools==5.3.2
38
+ catalogue==2.0.10
39
+ certifi==2024.2.2
40
+ cffi==1.16.0
41
+ chardet==5.2.0
42
+ charset-normalizer==3.3.2
43
+ chex==0.1.85
44
+ click-plugins==1.1.1
45
+ click==8.1.7
46
+ cligj==0.7.2
47
+ cloudpathlib==0.16.0
48
+ cloudpickle==2.2.1
49
+ cmake==3.27.9
50
+ cmdstanpy==1.2.1
51
+ colorcet==3.0.1
52
+ colorlover==0.3.0
53
+ colour==0.1.5
54
+ community==1.0.0b1
55
+ confection==0.1.4
56
+ cons==0.4.6
57
+ contextlib2==21.6.0
58
+ contourpy==1.2.0
59
+ cryptography==42.0.2
60
+ cufflinks==0.17.3
61
+ cupy-cuda12x==12.2.0
62
+ cvxopt==1.3.2
63
+ cvxpy==1.3.3
64
+ cycler==0.12.1
65
+ cymem==2.0.8
66
+ cython==3.0.8
67
+ dask==2023.8.1
68
+ datascience==0.17.6
69
+ datasets==2.17.0
70
+ db-dtypes==1.2.0
71
+ dbus-python==1.2.18
72
+ debugpy==1.6.6
73
+ decorator==4.4.2
74
+ defusedxml==0.7.1
75
+ dill==0.3.8
76
+ diskcache==5.6.3
77
+ distributed==2023.8.1
78
+ distro==1.7.0
79
+ dlib==19.24.2
80
+ dm-tree==0.1.8
81
+ docker-pycreds==0.4.0
82
+ docstring-parser==0.15
83
+ docutils==0.18.1
84
+ dopamine-rl==4.0.6
85
+ duckdb==0.9.2
86
+ earthengine-api==0.1.389
87
+ easydict==1.11
88
+ ecos==2.0.13
89
+ editdistance==0.6.2
90
+ eerepr==0.0.4
91
+ einops==0.7.0
92
+ en-core-web-sm==3.7.1
93
+ entrypoints==0.4
94
+ et-xmlfile==1.1.0
95
+ etils==1.6.0
96
+ etuples==0.3.9
97
+ exceptiongroup==1.2.0
98
+ fastai==2.7.14
99
+ fastcore==1.5.29
100
+ fastdownload==0.0.7
101
+ fastjsonschema==2.19.1
102
+ fastprogress==1.0.3
103
+ fastrlock==0.8.2
104
+ filelock==3.13.1
105
+ fiona==1.9.5
106
+ firebase-admin==5.3.0
107
+ flask==2.2.5
108
+ flatbuffers==23.5.26
109
+ flax==0.8.1
110
+ folium==0.14.0
111
+ fonttools==4.48.1
112
+ frozendict==2.4.0
113
+ frozenlist==1.4.1
114
+ fsspec==2023.6.0
115
+ future==0.18.3
116
+ gast==0.5.4
117
+ gcsfs==2023.6.0
118
+ gdal==3.6.4
119
+ gdown==4.7.3
120
+ geemap==0.30.4
121
+ gensim==4.3.2
122
+ geocoder==1.38.1
123
+ geographiclib==2.0
124
+ geopandas==0.13.2
125
+ geopy==2.3.0
126
+ gin-config==0.5.0
127
+ gitdb==4.0.11
128
+ gitpython==3.1.41
129
+ glob2==0.7
130
+ google-ai-generativelanguage==0.4.0
131
+ google-api-core==2.11.1
132
+ google-api-python-client==2.84.0
133
+ google-auth-httplib2==0.1.1
134
+ google-auth-oauthlib==1.2.0
135
+ google-auth==2.27.0
136
+ google-cloud-aiplatform==1.39.0
137
+ google-cloud-bigquery-connection==1.12.1
138
+ google-cloud-bigquery-storage==2.24.0
139
+ google-cloud-bigquery==3.12.0
140
+ google-cloud-core==2.3.3
141
+ google-cloud-datastore==2.15.2
142
+ google-cloud-firestore==2.11.1
143
+ google-cloud-functions==1.13.3
144
+ google-cloud-iam==2.14.1
145
+ google-cloud-language==2.9.1
146
+ google-cloud-resource-manager==1.12.1
147
+ google-cloud-storage==2.8.0
148
+ google-cloud-translate==3.11.3
149
+ google-colab==1.0.0
150
+ google-crc32c==1.5.0
151
+ google-generativeai==0.3.2
152
+ google-pasta==0.2.0
153
+ google-resumable-media==2.7.0
154
+ google==2.0.3
155
+ googleapis-common-protos==1.62.0
156
+ googledrivedownloader==0.4
157
+ graphviz==0.20.1
158
+ greenlet==3.0.3
159
+ grpc-google-iam-v1==0.13.0
160
+ grpcio-status==1.48.2
161
+ grpcio==1.60.1
162
+ gspread-dataframe==3.3.1
163
+ gspread==3.4.2
164
+ gym-notices==0.0.8
165
+ gym==0.25.2
166
+ h5netcdf==1.3.0
167
+ h5py==3.9.0
168
+ holidays==0.42
169
+ holoviews==1.17.1
170
+ html5lib==1.1
171
+ httpimport==1.3.1
172
+ httplib2==0.22.0
173
+ huggingface-hub==0.20.3
174
+ humanize==4.7.0
175
+ hyperopt==0.2.7
176
+ ibis-framework==7.1.0
177
+ idna==3.6
178
+ imageio-ffmpeg==0.4.9
179
+ imageio==2.31.6
180
+ imagesize==1.4.1
181
+ imbalanced-learn==0.10.1
182
+ imgaug==0.4.0
183
+ importlib-metadata==7.0.1
184
+ importlib-resources==6.1.1
185
+ imutils==0.5.4
186
+ inflect==7.0.0
187
+ iniconfig==2.0.0
188
+ install==1.3.5
189
+ intel-openmp==2023.2.3
190
+ ipyevents==2.0.2
191
+ ipyfilechooser==0.6.0
192
+ ipykernel==5.5.6
193
+ ipyleaflet==0.18.2
194
+ ipython-genutils==0.2.0
195
+ ipython-sql==0.5.0
196
+ ipython==7.34.0
197
+ ipytree==0.2.2
198
+ ipywidgets==7.7.1
199
+ itsdangerous==2.1.2
200
+ jax==0.4.23
201
+ jaxlib==0.4.23+cuda12.cudnn89
202
+ jeepney==0.7.1
203
+ jieba==0.42.1
204
+ jinja2==3.1.3
205
+ joblib==1.3.2
206
+ jsonpickle==3.0.2
207
+ jsonschema-specifications==2023.12.1
208
+ jsonschema==4.19.2
209
+ jupyter-client==6.1.12
210
+ jupyter-console==6.1.0
211
+ jupyter-core==5.7.1
212
+ jupyter-server==1.24.0
213
+ jupyterlab-pygments==0.3.0
214
+ jupyterlab-widgets==3.0.10
215
+ kaggle==1.5.16
216
+ kagglehub==0.1.9
217
+ keras==2.15.0
218
+ keyring==23.5.0
219
+ kiwisolver==1.4.5
220
+ langcodes==3.3.0
221
+ launchpadlib==1.10.16
222
+ lazr.restfulclient==0.14.4
223
+ lazr.uri==1.0.6
224
+ lazy-loader==0.3
225
+ libclang==16.0.6
226
+ librosa==0.10.1
227
+ lida==0.0.10
228
+ lightgbm==4.1.0
229
+ linkify-it-py==2.0.3
230
+ llmx==0.0.15a0
231
+ llvmlite==0.41.1
232
+ locket==1.0.0
233
+ logical-unification==0.4.6
234
+ lxml==4.9.4
235
+ malloy==2023.1067
236
+ markdown-it-py==3.0.0
237
+ markdown==3.5.2
238
+ markupsafe==2.1.5
239
+ matplotlib-inline==0.1.6
240
+ matplotlib-venn==0.11.10
241
+ matplotlib==3.7.1
242
+ mdit-py-plugins==0.4.0
243
+ mdurl==0.1.2
244
+ minikanren==1.0.3
245
+ missingno==0.5.2
246
+ mistune==0.8.4
247
+ mizani==0.9.3
248
+ mkl==2023.2.0
249
+ ml-dtypes==0.2.0
250
+ mlxtend==0.22.0
251
+ more-itertools==10.1.0
252
+ moviepy==1.0.3
253
+ mpmath==1.3.0
254
+ msgpack==1.0.7
255
+ multidict==6.0.5
256
+ multipledispatch==1.0.0
257
+ multiprocess==0.70.16
258
+ multitasking==0.0.11
259
+ murmurhash==1.0.10
260
+ music21==9.1.0
261
+ natsort==8.4.0
262
+ nbclassic==1.0.0
263
+ nbclient==0.9.0
264
+ nbconvert==6.5.4
265
+ nbformat==5.9.2
266
+ nest-asyncio==1.6.0
267
+ networkx==3.2.1
268
+ nibabel==4.0.2
269
+ nltk==3.8.1
270
+ notebook-shim==0.2.3
271
+ notebook==6.5.5
272
+ numba==0.58.1
273
+ numexpr==2.9.0
274
+ numpy==1.25.2
275
+ oauth2client==4.1.3
276
+ oauthlib==3.2.2
277
+ opencv-contrib-python==4.8.0.76
278
+ opencv-python-headless==4.9.0.80
279
+ opencv-python==4.8.0.76
280
+ openpyxl==3.1.2
281
+ opt-einsum==3.3.0
282
+ optax==0.1.9
283
+ orbax-checkpoint==0.4.4
284
+ osqp==0.6.2.post8
285
+ packaging==23.2
286
+ pandas-datareader==0.10.0
287
+ pandas-gbq==0.19.2
288
+ pandas-stubs==1.5.3.230304
289
+ pandas==1.5.3
290
+ pandocfilters==1.5.1
291
+ panel==1.3.8
292
+ param==2.0.2
293
+ parso==0.8.3
294
+ parsy==2.1
295
+ partd==1.4.1
296
+ pathlib==1.0.1
297
+ patsy==0.5.6
298
+ peewee==3.17.1
299
+ peft==0.8.2
300
+ pexpect==4.9.0
301
+ pickleshare==0.7.5
302
+ pillow==9.4.0
303
+ pins==0.8.4
304
+ pip-tools==6.13.0
305
+ pip==23.1.2
306
+ platformdirs==4.2.0
307
+ plotly==5.15.0
308
+ plotnine==0.12.4
309
+ pluggy==1.4.0
310
+ polars==0.20.2
311
+ pooch==1.8.0
312
+ portpicker==1.5.2
313
+ prefetch-generator==1.0.3
314
+ preshed==3.0.9
315
+ prettytable==3.9.0
316
+ proglog==0.1.10
317
+ progressbar2==4.2.0
318
+ prometheus-client==0.19.0
319
+ promise==2.3
320
+ prompt-toolkit==3.0.43
321
+ prophet==1.1.5
322
+ proto-plus==1.23.0
323
+ protobuf==3.20.3
324
+ psutil==5.9.5
325
+ psycopg2==2.9.9
326
+ ptyprocess==0.7.0
327
+ py-cpuinfo==9.0.0
328
+ py4j==0.10.9.7
329
+ pyarrow-hotfix==0.6
330
+ pyarrow==15.0.0
331
+ pyasn1-modules==0.3.0
332
+ pyasn1==0.5.1
333
+ pycocotools==2.0.7
334
+ pycparser==2.21
335
+ pyct==0.5.0
336
+ pydantic-core==2.16.2
337
+ pydantic==2.6.1
338
+ pydata-google-auth==1.8.2
339
+ pydot-ng==2.0.0
340
+ pydot==1.4.2
341
+ pydotplus==2.0.2
342
+ pydrive2==1.6.3
343
+ pydrive==1.3.1
344
+ pyerfa==2.0.1.1
345
+ pygame==2.5.2
346
+ pygments==2.16.1
347
+ pygobject==3.42.1
348
+ pyjwt==2.3.0
349
+ pymc==5.7.2
350
+ pymystem3==0.2.0
351
+ pyopengl==3.1.7
352
+ pyopenssl==24.0.0
353
+ pyparsing==3.1.1
354
+ pyperclip==1.8.2
355
+ pyproj==3.6.1
356
+ pyproject-hooks==1.0.0
357
+ pyshp==2.3.1
358
+ pysocks==1.7.1
359
+ pytensor==2.14.2
360
+ pytest==7.4.4
361
+ python-apt==0.0.0
362
+ python-box==7.1.1
363
+ python-dateutil==2.8.2
364
+ python-louvain==0.16
365
+ python-slugify==8.0.4
366
+ python-utils==3.8.2
367
+ pytz==2023.4
368
+ pyviz-comms==3.0.1
369
+ pywavelets==1.5.0
370
+ pyyaml==6.0.1
371
+ pyzmq==23.2.1
372
+ qdldl==0.1.7.post0
373
+ qudida==0.0.4
374
+ ratelim==0.1.6
375
+ referencing==0.33.0
376
+ regex==2023.12.25
377
+ requests-oauthlib==1.3.1
378
+ requests==2.31.0
379
+ requirements-parser==0.5.0
380
+ rich==13.7.0
381
+ rpds-py==0.17.1
382
+ rpy2==3.4.2
383
+ rsa==4.9
384
+ safetensors==0.4.2
385
+ scikit-image==0.19.3
386
+ scikit-learn==1.2.2
387
+ scipy==1.11.4
388
+ scooby==0.9.2
389
+ scs==3.2.4.post1
390
+ seaborn==0.13.1
391
+ secretstorage==3.3.1
392
+ send2trash==1.8.2
393
+ sentencepiece==0.1.99
394
+ sentry-sdk==1.40.4
395
+ setproctitle==1.3.3
396
+ setuptools==67.7.2
397
+ shapely==2.0.2
398
+ shtab==1.6.5
399
+ six==1.16.0
400
+ sklearn-pandas==2.2.0
401
+ smart-open==6.4.0
402
+ smmap==5.0.1
403
+ sniffio==1.3.0
404
+ snowballstemmer==2.2.0
405
+ sortedcontainers==2.4.0
406
+ soundfile==0.12.1
407
+ soupsieve==2.5
408
+ soxr==0.3.7
409
+ spacy-legacy==3.0.12
410
+ spacy-loggers==1.0.5
411
+ spacy==3.7.2
412
+ sphinx==5.0.2
413
+ sphinxcontrib-applehelp==1.0.8
414
+ sphinxcontrib-devhelp==1.0.6
415
+ sphinxcontrib-htmlhelp==2.0.5
416
+ sphinxcontrib-jsmath==1.0.1
417
+ sphinxcontrib-qthelp==1.0.7
418
+ sphinxcontrib-serializinghtml==1.1.10
419
+ sqlalchemy==2.0.25
420
+ sqlglot==19.9.0
421
+ sqlparse==0.4.4
422
+ srsly==2.4.8
423
+ stanio==0.3.0
424
+ statsmodels==0.14.1
425
+ sympy==1.12
426
+ tables==3.8.0
427
+ tabulate==0.9.0
428
+ tbb==2021.11.0
429
+ tblib==3.0.0
430
+ tenacity==8.2.3
431
+ tensorboard-data-server==0.7.2
432
+ tensorboard==2.15.2
433
+ tensorflow-datasets==4.9.4
434
+ tensorflow-estimator==2.15.0
435
+ tensorflow-gcs-config==2.15.0
436
+ tensorflow-hub==0.16.1
437
+ tensorflow-io-gcs-filesystem==0.36.0
438
+ tensorflow-metadata==1.14.0
439
+ tensorflow-probability==0.23.0
440
+ tensorflow==2.15.0
441
+ tensorstore==0.1.45
442
+ termcolor==2.4.0
443
+ terminado==0.18.0
444
+ text-unidecode==1.3
445
+ textblob==0.17.1
446
+ tf-keras==2.15.0
447
+ tf-slim==1.1.0
448
+ thinc==8.2.3
449
+ threadpoolctl==3.2.0
450
+ tifffile==2024.1.30
451
+ tinycss2==1.2.1
452
+ tokenizers==0.15.1
453
+ toml==0.10.2
454
+ tomli==2.0.1
455
+ toolz==0.12.1
456
+ torch==2.1.0+cu121
457
+ torchaudio==2.1.0+cu121
458
+ torchdata==0.7.0
459
+ torchsummary==1.5.1
460
+ torchtext==0.16.0
461
+ torchvision==0.16.0+cu121
462
+ tornado==6.3.2
463
+ tqdm==4.66.1
464
+ traitlets==5.7.1
465
+ traittypes==0.2.1
466
+ transformers==4.37.2
467
+ triton==2.1.0
468
+ trl==0.7.10
469
+ tweepy==4.14.0
470
+ typer==0.9.0
471
+ types-pytz==2024.1.0.20240203
472
+ types-setuptools==69.0.0.20240125
473
+ typing-extensions==4.9.0
474
+ tyro==0.7.2
475
+ tzlocal==5.2
476
+ uc-micro-py==1.0.3
477
+ uritemplate==4.1.1
478
+ urllib3==2.0.7
479
+ vega-datasets==0.9.0
480
+ wadllib==1.3.6
481
+ wandb==0.16.3
482
+ wasabi==1.1.2
483
+ wcwidth==0.2.13
484
+ weasel==0.3.4
485
+ webcolors==1.13
486
+ webencodings==0.5.1
487
+ websocket-client==1.7.0
488
+ werkzeug==3.0.1
489
+ wheel==0.42.0
490
+ widgetsnbextension==3.6.6
491
+ wordcloud==1.9.3
492
+ wrapt==1.14.1
493
+ xarray-einstats==0.7.0
494
+ xarray==2023.7.0
495
+ xgboost==2.0.3
496
+ xlrd==2.0.1
497
+ xxhash==3.4.1
498
+ xyzservices==2023.10.1
499
+ yarl==1.9.4
500
+ yellowbrick==1.5
501
+ yfinance==0.2.36
502
+ zict==3.0.0
503
+ zipp==3.17.0
wandb/run-20240214_125429-fe3s7gan/files/wandb-metadata.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.1.58+-x86_64-with-glibc2.35",
3
+ "python": "3.10.12",
4
+ "heartbeatAt": "2024-02-14T12:54:30.832517",
5
+ "startedAt": "2024-02-14T12:54:29.373808",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [],
9
+ "state": "running",
10
+ "program": "Smart_Agriculture_Falcon.ipynb",
11
+ "codePathLocal": null,
12
+ "colab": "https://colab.research.google.com/notebook#fileId=1kv2VO7q0c8xX_DQt5NERLeJJZtHW6zHO",
13
+ "host": "f4b889916ade",
14
+ "username": "root",
15
+ "executable": "/usr/bin/python3",
16
+ "cpu_count": 4,
17
+ "cpu_count_logical": 8,
18
+ "cpu_freq": {
19
+ "current": 2299.998,
20
+ "min": 0.0,
21
+ "max": 0.0
22
+ },
23
+ "cpu_freq_per_core": [
24
+ {
25
+ "current": 2299.998,
26
+ "min": 0.0,
27
+ "max": 0.0
28
+ },
29
+ {
30
+ "current": 2299.998,
31
+ "min": 0.0,
32
+ "max": 0.0
33
+ },
34
+ {
35
+ "current": 2299.998,
36
+ "min": 0.0,
37
+ "max": 0.0
38
+ },
39
+ {
40
+ "current": 2299.998,
41
+ "min": 0.0,
42
+ "max": 0.0
43
+ },
44
+ {
45
+ "current": 2299.998,
46
+ "min": 0.0,
47
+ "max": 0.0
48
+ },
49
+ {
50
+ "current": 2299.998,
51
+ "min": 0.0,
52
+ "max": 0.0
53
+ },
54
+ {
55
+ "current": 2299.998,
56
+ "min": 0.0,
57
+ "max": 0.0
58
+ },
59
+ {
60
+ "current": 2299.998,
61
+ "min": 0.0,
62
+ "max": 0.0
63
+ }
64
+ ],
65
+ "disk": {
66
+ "/": {
67
+ "total": 166.77415084838867,
68
+ "used": 66.21718978881836
69
+ }
70
+ },
71
+ "gpu": "Tesla T4",
72
+ "gpu_count": 1,
73
+ "gpu_devices": [
74
+ {
75
+ "name": "Tesla T4",
76
+ "memory_total": 16106127360
77
+ }
78
+ ],
79
+ "memory": {
80
+ "total": 50.993690490722656
81
+ }
82
+ }
wandb/run-20240214_125429-fe3s7gan/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/loss": 2.3092, "train/learning_rate": 0.00019996629653035126, "train/epoch": 0.8, "train/global_step": 10, "_timestamp": 1707915351.0395215, "_runtime": 81.65880036354065, "_step": 0}
wandb/run-20240214_125429-fe3s7gan/logs/debug-internal.log ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-02-14 12:54:29,380 INFO StreamThr :35088 [internal.py:wandb_internal():86] W&B internal server running at pid: 35088, started at: 2024-02-14 12:54:29.379470
2
+ 2024-02-14 12:54:29,381 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status
3
+ 2024-02-14 12:54:30,447 INFO WriterThread:35088 [datastore.py:open_for_write():87] open: /content/wandb/run-20240214_125429-fe3s7gan/run-fe3s7gan.wandb
4
+ 2024-02-14 12:54:30,447 DEBUG SenderThread:35088 [sender.py:send():382] send: header
5
+ 2024-02-14 12:54:30,450 DEBUG SenderThread:35088 [sender.py:send():382] send: run
6
+ 2024-02-14 12:54:30,674 INFO SenderThread:35088 [dir_watcher.py:__init__():211] watching files in: /content/wandb/run-20240214_125429-fe3s7gan/files
7
+ 2024-02-14 12:54:30,674 INFO SenderThread:35088 [sender.py:_start_run_threads():1136] run started: fe3s7gan with start time 1707915269.380721
8
+ 2024-02-14 12:54:30,681 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: check_version
9
+ 2024-02-14 12:54:30,682 DEBUG SenderThread:35088 [sender.py:send_request():409] send_request: check_version
10
+ 2024-02-14 12:54:30,815 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: run_start
11
+ 2024-02-14 12:54:30,824 DEBUG HandlerThread:35088 [system_info.py:__init__():27] System info init
12
+ 2024-02-14 12:54:30,824 DEBUG HandlerThread:35088 [system_info.py:__init__():42] System info init done
13
+ 2024-02-14 12:54:30,824 INFO HandlerThread:35088 [system_monitor.py:start():194] Starting system monitor
14
+ 2024-02-14 12:54:30,824 INFO SystemMonitor:35088 [system_monitor.py:_start():158] Starting system asset monitoring threads
15
+ 2024-02-14 12:54:30,825 INFO HandlerThread:35088 [system_monitor.py:probe():214] Collecting system info
16
+ 2024-02-14 12:54:30,825 INFO SystemMonitor:35088 [interfaces.py:start():190] Started cpu monitoring
17
+ 2024-02-14 12:54:30,826 INFO SystemMonitor:35088 [interfaces.py:start():190] Started disk monitoring
18
+ 2024-02-14 12:54:30,826 INFO SystemMonitor:35088 [interfaces.py:start():190] Started gpu monitoring
19
+ 2024-02-14 12:54:30,828 INFO SystemMonitor:35088 [interfaces.py:start():190] Started memory monitoring
20
+ 2024-02-14 12:54:30,828 INFO SystemMonitor:35088 [interfaces.py:start():190] Started network monitoring
21
+ 2024-02-14 12:54:30,832 DEBUG HandlerThread:35088 [system_info.py:probe():151] Probing system
22
+ 2024-02-14 12:54:30,833 DEBUG HandlerThread:35088 [gitlib.py:_init_repo():56] git repository is invalid
23
+ 2024-02-14 12:54:30,833 DEBUG HandlerThread:35088 [system_info.py:probe():199] Probing system done
24
+ 2024-02-14 12:54:30,833 DEBUG HandlerThread:35088 [system_monitor.py:probe():223] {'os': 'Linux-6.1.58+-x86_64-with-glibc2.35', 'python': '3.10.12', 'heartbeatAt': '2024-02-14T12:54:30.832517', 'startedAt': '2024-02-14T12:54:29.373808', 'docker': None, 'cuda': None, 'args': (), 'state': 'running', 'program': 'Smart_Agriculture_Falcon.ipynb', 'codePathLocal': None, 'colab': 'https://colab.research.google.com/notebook#fileId=1kv2VO7q0c8xX_DQt5NERLeJJZtHW6zHO', 'host': 'f4b889916ade', 'username': 'root', 'executable': '/usr/bin/python3', 'cpu_count': 4, 'cpu_count_logical': 8, 'cpu_freq': {'current': 2299.998, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2299.998, 'min': 0.0, 'max': 0.0}, {'current': 2299.998, 'min': 0.0, 'max': 0.0}, {'current': 2299.998, 'min': 0.0, 'max': 0.0}, {'current': 2299.998, 'min': 0.0, 'max': 0.0}, {'current': 2299.998, 'min': 0.0, 'max': 0.0}, {'current': 2299.998, 'min': 0.0, 'max': 0.0}, {'current': 2299.998, 'min': 0.0, 'max': 0.0}, {'current': 2299.998, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 166.77415084838867, 'used': 66.21718978881836}}, 'gpu': 'Tesla T4', 'gpu_count': 1, 'gpu_devices': [{'name': 'Tesla T4', 'memory_total': 16106127360}], 'memory': {'total': 50.993690490722656}}
25
+ 2024-02-14 12:54:30,834 INFO HandlerThread:35088 [system_monitor.py:probe():224] Finished collecting system info
26
+ 2024-02-14 12:54:30,834 INFO HandlerThread:35088 [system_monitor.py:probe():227] Publishing system info
27
+ 2024-02-14 12:54:30,834 INFO HandlerThread:35088 [system_monitor.py:probe():229] Finished publishing system info
28
+ 2024-02-14 12:54:30,840 DEBUG SenderThread:35088 [sender.py:send():382] send: files
29
+ 2024-02-14 12:54:30,840 INFO SenderThread:35088 [sender.py:_save_file():1403] saving file wandb-metadata.json with policy now
30
+ 2024-02-14 12:54:30,856 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: python_packages
31
+ 2024-02-14 12:54:30,856 DEBUG SenderThread:35088 [sender.py:send_request():409] send_request: python_packages
32
+ 2024-02-14 12:54:30,896 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: stop_status
33
+ 2024-02-14 12:54:30,896 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: internal_messages
34
+ 2024-02-14 12:54:30,897 DEBUG SenderThread:35088 [sender.py:send_request():409] send_request: stop_status
35
+ 2024-02-14 12:54:31,103 DEBUG SenderThread:35088 [sender.py:send():382] send: telemetry
36
+ 2024-02-14 12:54:31,103 DEBUG SenderThread:35088 [sender.py:send():382] send: config
37
+ 2024-02-14 12:54:31,104 DEBUG SenderThread:35088 [sender.py:send():382] send: metric
38
+ 2024-02-14 12:54:31,104 DEBUG SenderThread:35088 [sender.py:send():382] send: telemetry
39
+ 2024-02-14 12:54:31,104 DEBUG SenderThread:35088 [sender.py:send():382] send: metric
40
+ 2024-02-14 12:54:31,104 WARNING SenderThread:35088 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
41
+ 2024-02-14 12:54:31,104 DEBUG SenderThread:35088 [sender.py:send():382] send: telemetry
42
+ 2024-02-14 12:54:31,178 INFO wandb-upload_0:35088 [upload_job.py:push():131] Uploaded file /tmp/tmp668dgil2wandb/pe1jrqzh-wandb-metadata.json
43
+ 2024-02-14 12:54:31,675 INFO Thread-12 :35088 [dir_watcher.py:_on_file_created():271] file/dir created: /content/wandb/run-20240214_125429-fe3s7gan/files/requirements.txt
44
+ 2024-02-14 12:54:31,675 INFO Thread-12 :35088 [dir_watcher.py:_on_file_created():271] file/dir created: /content/wandb/run-20240214_125429-fe3s7gan/files/wandb-metadata.json
45
+ 2024-02-14 12:54:35,105 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
46
+ 2024-02-14 12:54:40,106 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
47
+ 2024-02-14 12:54:45,107 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
48
+ 2024-02-14 12:54:45,857 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: stop_status
49
+ 2024-02-14 12:54:45,857 DEBUG SenderThread:35088 [sender.py:send_request():409] send_request: stop_status
50
+ 2024-02-14 12:54:45,897 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: internal_messages
51
+ 2024-02-14 12:54:50,987 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
52
+ 2024-02-14 12:54:55,988 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
53
+ 2024-02-14 12:55:00,857 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: stop_status
54
+ 2024-02-14 12:55:00,857 DEBUG SenderThread:35088 [sender.py:send_request():409] send_request: stop_status
55
+ 2024-02-14 12:55:00,897 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: internal_messages
56
+ 2024-02-14 12:55:01,012 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
57
+ 2024-02-14 12:55:01,684 INFO Thread-12 :35088 [dir_watcher.py:_on_file_modified():288] file/dir modified: /content/wandb/run-20240214_125429-fe3s7gan/files/config.yaml
58
+ 2024-02-14 12:55:06,125 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
59
+ 2024-02-14 12:55:11,126 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
60
+ 2024-02-14 12:55:15,857 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: stop_status
61
+ 2024-02-14 12:55:15,857 DEBUG SenderThread:35088 [sender.py:send_request():409] send_request: stop_status
62
+ 2024-02-14 12:55:15,897 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: internal_messages
63
+ 2024-02-14 12:55:16,924 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
64
+ 2024-02-14 12:55:21,925 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
65
+ 2024-02-14 12:55:26,926 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
66
+ 2024-02-14 12:55:30,828 DEBUG SystemMonitor:35088 [system_monitor.py:_start():172] Starting system metrics aggregation loop
67
+ 2024-02-14 12:55:30,829 DEBUG SenderThread:35088 [sender.py:send():382] send: stats
68
+ 2024-02-14 12:55:30,857 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: stop_status
69
+ 2024-02-14 12:55:30,857 DEBUG SenderThread:35088 [sender.py:send_request():409] send_request: stop_status
70
+ 2024-02-14 12:55:30,897 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: internal_messages
71
+ 2024-02-14 12:55:31,981 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
72
+ 2024-02-14 12:55:36,982 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
73
+ 2024-02-14 12:55:41,983 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
74
+ 2024-02-14 12:55:45,857 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: stop_status
75
+ 2024-02-14 12:55:45,858 DEBUG SenderThread:35088 [sender.py:send_request():409] send_request: stop_status
76
+ 2024-02-14 12:55:45,898 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: internal_messages
77
+ 2024-02-14 12:55:47,973 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
78
+ 2024-02-14 12:55:51,040 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: partial_history
79
+ 2024-02-14 12:55:51,042 DEBUG SenderThread:35088 [sender.py:send():382] send: metric
80
+ 2024-02-14 12:55:51,042 DEBUG SenderThread:35088 [sender.py:send():382] send: metric
81
+ 2024-02-14 12:55:51,042 DEBUG SenderThread:35088 [sender.py:send():382] send: metric
82
+ 2024-02-14 12:55:51,042 DEBUG SenderThread:35088 [sender.py:send():382] send: history
83
+ 2024-02-14 12:55:51,043 DEBUG SenderThread:35088 [sender.py:send_request():409] send_request: summary_record
84
+ 2024-02-14 12:55:51,045 INFO SenderThread:35088 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
85
+ 2024-02-14 12:55:51,700 INFO Thread-12 :35088 [dir_watcher.py:_on_file_created():271] file/dir created: /content/wandb/run-20240214_125429-fe3s7gan/files/wandb-summary.json
86
+ 2024-02-14 12:55:51,701 INFO Thread-12 :35088 [dir_watcher.py:_on_file_created():271] file/dir created: /content/wandb/run-20240214_125429-fe3s7gan/files/output.log
87
+ 2024-02-14 12:55:53,082 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
88
+ 2024-02-14 12:55:53,701 INFO Thread-12 :35088 [dir_watcher.py:_on_file_modified():288] file/dir modified: /content/wandb/run-20240214_125429-fe3s7gan/files/output.log
89
+ 2024-02-14 12:55:58,083 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: status_report
90
+ 2024-02-14 12:56:00,830 DEBUG SenderThread:35088 [sender.py:send():382] send: stats
91
+ 2024-02-14 12:56:00,857 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: stop_status
92
+ 2024-02-14 12:56:00,858 DEBUG HandlerThread:35088 [handler.py:handle_request():146] handle_request: internal_messages
93
+ 2024-02-14 12:56:00,858 DEBUG SenderThread:35088 [sender.py:send_request():409] send_request: stop_status
wandb/run-20240214_125429-fe3s7gan/logs/debug.log ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_setup.py:_flush():76] Current SDK version is 0.16.3
2
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_setup.py:_flush():76] Configure stats pid to 34240
3
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
4
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_setup.py:_flush():76] Loading settings from /content/wandb/settings
5
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
8
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_init.py:_log_setup():526] Logging user logs to /content/wandb/run-20240214_125429-fe3s7gan/logs/debug.log
9
+ 2024-02-14 12:54:29,375 INFO MainThread:34240 [wandb_init.py:_log_setup():527] Logging internal logs to /content/wandb/run-20240214_125429-fe3s7gan/logs/debug-internal.log
10
+ 2024-02-14 12:54:29,376 INFO MainThread:34240 [wandb_init.py:_jupyter_setup():472] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7db6b2e92350>
11
+ 2024-02-14 12:54:29,376 INFO MainThread:34240 [wandb_init.py:init():566] calling init triggers
12
+ 2024-02-14 12:54:29,376 INFO MainThread:34240 [wandb_init.py:init():573] wandb.init called with sweep_config: {}
13
+ config: {}
14
+ 2024-02-14 12:54:29,376 INFO MainThread:34240 [wandb_init.py:init():616] starting backend
15
+ 2024-02-14 12:54:29,376 INFO MainThread:34240 [wandb_init.py:init():620] setting up manager
16
+ 2024-02-14 12:54:29,378 INFO MainThread:34240 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2024-02-14 12:54:29,380 INFO MainThread:34240 [wandb_init.py:init():628] backend started and connected
18
+ 2024-02-14 12:54:29,388 INFO MainThread:34240 [wandb_run.py:_label_probe_notebook():1295] probe notebook
19
+ 2024-02-14 12:54:30,446 INFO MainThread:34240 [wandb_init.py:init():720] updated telemetry
20
+ 2024-02-14 12:54:30,449 INFO MainThread:34240 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
21
+ 2024-02-14 12:54:30,681 INFO MainThread:34240 [wandb_run.py:_on_init():2262] communicating current version
22
+ 2024-02-14 12:54:30,810 INFO MainThread:34240 [wandb_run.py:_on_init():2271] got version response
23
+ 2024-02-14 12:54:30,810 INFO MainThread:34240 [wandb_init.py:init():804] starting run threads in backend
24
+ 2024-02-14 12:54:30,856 INFO MainThread:34240 [wandb_run.py:_console_start():2241] atexit reg
25
+ 2024-02-14 12:54:30,857 INFO MainThread:34240 [wandb_run.py:_redirect():2096] redirect: wrap_raw
26
+ 2024-02-14 12:54:30,857 INFO MainThread:34240 [wandb_run.py:_redirect():2161] Wrapping output streams.
27
+ 2024-02-14 12:54:30,857 INFO MainThread:34240 [wandb_run.py:_redirect():2186] Redirects installed.
28
+ 2024-02-14 12:54:30,858 INFO MainThread:34240 [wandb_init.py:init():847] run started, returning control to user process
29
+ 2024-02-14 12:54:30,862 INFO MainThread:34240 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'num_hidden_layers': 32, 'num_attention_heads': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'num_kv_heads': 71, 'alibi': False, 'new_decoder_architecture': False, 'multi_query': True, 'parallel_attn': True, 'bias': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['FalconForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'vilsonrodrigues/falcon-7b-instruct-sharded', 'transformers_version': '4.37.2', 'apply_residual_connection_post_layernorm': False, 'auto_map': {'AutoConfig': 'vilsonrodrigues/falcon-7b-instruct-sharded--configuration_falcon.FalconConfig', 'AutoModel': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconModel', 'AutoModelForSequenceClassification': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForSequenceClassification', 'AutoModelForTokenClassification': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForTokenClassification', 'AutoModelForQuestionAnswering': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForQuestionAnswering', 'AutoModelForCausalLM': 'vilsonrodrigues/falcon-7b-instruct-sharded--modeling_falcon.FalconForCausalLM'}, 'model_type': 'falcon', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', 'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 250, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb14_12-54-25_f4b889916ade', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': None, 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
wandb/run-20240214_125429-fe3s7gan/run-fe3s7gan.wandb ADDED
Binary file (6.9 kB). View file