sanchit-gandhi HF staff commited on
Commit
78bd673
1 Parent(s): 79a4ae8

Training in progress, step 100

Browse files
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sanchit-gandhi/Mistral-7B-v0.1-6-layer",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 14336,
13
+ "max_position_embeddings": 32768,
14
+ "model_type": "mistral",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 6,
17
+ "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_theta": 10000.0,
20
+ "sliding_window": 4096,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.36.2",
24
+ "use_cache": false,
25
+ "vocab_size": 32000
26
+ }
config_full.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model arguments
2
+ model_name_or_path: sanchit-gandhi/Mistral-7B-v0.1-6-layer
3
+ model_revision: main
4
+ torch_dtype: bfloat16
5
+ use_flash_attention_2: true
6
+
7
+ # Data training arguments
8
+ chat_template: "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"
9
+ dataset_mixer:
10
+ HuggingFaceH4/ultrachat_200k: 1.0
11
+ dataset_splits:
12
+ - train_sft
13
+ - test_sft
14
+ preprocessing_num_workers: 12
15
+
16
+ # SFT trainer config
17
+ bf16: true
18
+ do_eval: true
19
+ evaluation_strategy: epoch
20
+ gradient_accumulation_steps: 1
21
+ gradient_checkpointing: true
22
+ gradient_checkpointing_kwargs:
23
+ use_reentrant: False
24
+ hub_strategy: every_save
25
+ learning_rate: 2.0e-05
26
+ log_level: info
27
+ logging_steps: 5
28
+ logging_strategy: steps
29
+ lr_scheduler_type: cosine
30
+ max_seq_length: 2048
31
+ max_steps: -1
32
+ num_train_epochs: 1
33
+ output_dir: ./
34
+ overwrite_output_dir: true
35
+ per_device_eval_batch_size: 8
36
+ per_device_train_batch_size: 16
37
+ push_to_hub: true
38
+ remove_unused_columns: true
39
+ report_to:
40
+ - tensorboard
41
+ - wandb
42
+ save_strategy: "steps"
43
+ save_steps: 100
44
+ save_total_limit: 1
45
+ seed: 42
46
+ warmup_ratio: 0.1
deepspeed_zero3.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ compute_environment: LOCAL_MACHINE
2
+ debug: false
3
+ deepspeed_config:
4
+ deepspeed_multinode_launcher: standard
5
+ offload_optimizer_device: none
6
+ offload_param_device: none
7
+ zero3_init_flag: true
8
+ zero3_save_16bit_model: true
9
+ zero_stage: 3
10
+ distributed_type: DEEPSPEED
11
+ downcast_bf16: 'no'
12
+ machine_rank: 0
13
+ main_training_function: main
14
+ mixed_precision: bf16
15
+ num_machines: 1
16
+ num_processes: 8
17
+ rdzv_backend: static
18
+ same_network: true
19
+ tpu_env: []
20
+ tpu_use_cluster: false
21
+ tpu_use_sudo: false
22
+ use_cpu: false
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea3988d778bcca2e85f7c4596420ea91e8b5ce55168e2a692fb57b0f4bb71c04
3
+ size 3141646744
run_sft.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ """
17
+ Supervised fine-tuning script for decoder language models.
18
+ """
19
+
20
+ import logging
21
+ import random
22
+ import sys
23
+
24
+ import datasets
25
+ import torch
26
+ import transformers
27
+ from transformers import set_seed
28
+
29
+ from alignment import (
30
+ DataArguments,
31
+ H4ArgumentParser,
32
+ ModelArguments,
33
+ SFTConfig,
34
+ apply_chat_template,
35
+ get_checkpoint,
36
+ get_datasets,
37
+ get_kbit_device_map,
38
+ get_peft_config,
39
+ get_quantization_config,
40
+ get_tokenizer,
41
+ )
42
+ from trl import SFTTrainer
43
+
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+
48
+ def main():
49
+ parser = H4ArgumentParser((ModelArguments, DataArguments, SFTConfig))
50
+ model_args, data_args, training_args = parser.parse()
51
+
52
+ # Set seed for reproducibility
53
+ set_seed(training_args.seed)
54
+
55
+ ###############
56
+ # Setup logging
57
+ ###############
58
+ logging.basicConfig(
59
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
60
+ datefmt="%Y-%m-%d %H:%M:%S",
61
+ handlers=[logging.StreamHandler(sys.stdout)],
62
+ )
63
+ log_level = training_args.get_process_log_level()
64
+ logger.setLevel(log_level)
65
+ datasets.utils.logging.set_verbosity(log_level)
66
+ transformers.utils.logging.set_verbosity(log_level)
67
+ transformers.utils.logging.enable_default_handler()
68
+ transformers.utils.logging.enable_explicit_format()
69
+
70
+ # Log on each process a small summary
71
+ logger.warning(
72
+ f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
73
+ + f" distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
74
+ )
75
+ logger.info(f"Model parameters {model_args}")
76
+ logger.info(f"Data parameters {data_args}")
77
+ logger.info(f"Training/evaluation parameters {training_args}")
78
+
79
+ # Check for last checkpoint
80
+ last_checkpoint = get_checkpoint(training_args)
81
+ if last_checkpoint is not None and training_args.resume_from_checkpoint is None:
82
+ logger.info(f"Checkpoint detected, resuming training at {last_checkpoint=}.")
83
+
84
+ ###############
85
+ # Load datasets
86
+ ###############
87
+ raw_datasets = get_datasets(data_args, splits=data_args.dataset_splits)
88
+ logger.info(
89
+ f"Training on the following datasets and their proportions: {[split + ' : ' + str(dset.num_rows) for split, dset in raw_datasets.items()]}"
90
+ )
91
+ column_names = list(raw_datasets["train"].features)
92
+
93
+ ################
94
+ # Load tokenizer
95
+ ################
96
+ tokenizer = get_tokenizer(model_args, data_args)
97
+
98
+ #####################
99
+ # Apply chat template
100
+ #####################
101
+ raw_datasets = raw_datasets.map(
102
+ apply_chat_template,
103
+ fn_kwargs={"tokenizer": tokenizer, "task": "sft"},
104
+ num_proc=data_args.preprocessing_num_workers,
105
+ remove_columns=column_names,
106
+ desc="Applying chat template",
107
+ )
108
+ train_dataset = raw_datasets["train"]
109
+ eval_dataset = raw_datasets["test"]
110
+
111
+ with training_args.main_process_first(desc="Log a few random samples from the processed training set"):
112
+ for index in random.sample(range(len(raw_datasets["train"])), 3):
113
+ logger.info(f"Sample {index} of the processed training set:\n\n{raw_datasets['train'][index]['text']}")
114
+
115
+ #######################
116
+ # Load pretrained model
117
+ #######################
118
+ logger.info("*** Load pretrained model ***")
119
+ torch_dtype = (
120
+ model_args.torch_dtype if model_args.torch_dtype in ["auto", None] else getattr(torch, model_args.torch_dtype)
121
+ )
122
+ quantization_config = get_quantization_config(model_args)
123
+
124
+ model_kwargs = dict(
125
+ revision=model_args.model_revision,
126
+ trust_remote_code=model_args.trust_remote_code,
127
+ use_flash_attention_2=model_args.use_flash_attention_2,
128
+ torch_dtype=torch_dtype,
129
+ use_cache=False if training_args.gradient_checkpointing else True,
130
+ device_map=get_kbit_device_map() if quantization_config is not None else None,
131
+ quantization_config=quantization_config,
132
+ )
133
+ logger.info("*** Model loaded! ***")
134
+
135
+ ########################
136
+ # Initialize the Trainer
137
+ ########################
138
+ trainer = SFTTrainer(
139
+ model=model_args.model_name_or_path,
140
+ model_init_kwargs=model_kwargs,
141
+ args=training_args,
142
+ train_dataset=train_dataset,
143
+ eval_dataset=eval_dataset,
144
+ dataset_text_field="text",
145
+ max_seq_length=training_args.max_seq_length,
146
+ tokenizer=tokenizer,
147
+ packing=True,
148
+ peft_config=get_peft_config(model_args),
149
+ )
150
+
151
+ ###############
152
+ # Training loop
153
+ ###############
154
+ logger.info("*** Train ***")
155
+ checkpoint = None
156
+ if training_args.resume_from_checkpoint is not None:
157
+ checkpoint = training_args.resume_from_checkpoint
158
+ elif last_checkpoint is not None:
159
+ checkpoint = last_checkpoint
160
+ train_result = trainer.train(resume_from_checkpoint=checkpoint)
161
+ metrics = train_result.metrics
162
+ metrics["train_samples"] = len(train_dataset)
163
+ trainer.log_metrics("train", metrics)
164
+ trainer.save_metrics("train", metrics)
165
+ trainer.save_state()
166
+
167
+ ##########
168
+ # Evaluate
169
+ ##########
170
+ if training_args.do_eval:
171
+ logger.info("*** Evaluate ***")
172
+ metrics = trainer.evaluate()
173
+ metrics["eval_samples"] = len(eval_dataset)
174
+ trainer.log_metrics("eval", metrics)
175
+ trainer.save_metrics("eval", metrics)
176
+
177
+ ##################################
178
+ # Save model and create model card
179
+ ##################################
180
+ logger.info("*** Save model ***")
181
+ trainer.save_model(training_args.output_dir)
182
+ logger.info(f"Model saved to {training_args.output_dir}")
183
+
184
+ # Save everything else on main process
185
+ kwargs = {
186
+ "finetuned_from": model_args.model_name_or_path,
187
+ "dataset": list(data_args.dataset_mixer.keys()),
188
+ "dataset_tags": list(data_args.dataset_mixer.keys()),
189
+ "tags": ["alignment-handbook"],
190
+ }
191
+ if trainer.accelerator.is_main_process:
192
+ trainer.create_model_card(**kwargs)
193
+ # Restore k,v cache for fast inference
194
+ trainer.model.config.use_cache = True
195
+ trainer.model.config.save_pretrained(training_args.output_dir)
196
+
197
+ if training_args.push_to_hub is True:
198
+ logger.info("Pushing to hub...")
199
+ trainer.push_to_hub(**kwargs)
200
+
201
+ logger.info("*** Training complete ***")
202
+
203
+
204
+ if __name__ == "__main__":
205
+ main()
runs/Feb01_17-38-02_ip-26-0-165-24/events.out.tfevents.1706809106.ip-26-0-165-24.237059.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16dd7fa95e3557c9f2227dd3fe7f108ee5d59052a40eeb5741db9bf340a8a65d
3
+ size 7609
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 2048,
37
+ "pad_token": "</s>",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85527db3b58440cee04521e4eb775e7259e847cd1a669eaf7502bcc6b6feb0ca
3
+ size 5816
wandb/debug-internal.log ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-02-01 17:38:28,445 INFO StreamThr :237521 [internal.py:wandb_internal():86] W&B internal server running at pid: 237521, started at: 2024-02-01 17:38:28.443368
2
+ 2024-02-01 17:38:28,446 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status
3
+ 2024-02-01 17:38:28,454 INFO WriterThread:237521 [datastore.py:open_for_write():85] open: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/run-py26nu6m.wandb
4
+ 2024-02-01 17:38:28,455 DEBUG SenderThread:237521 [sender.py:send():382] send: header
5
+ 2024-02-01 17:38:28,476 DEBUG SenderThread:237521 [sender.py:send():382] send: run
6
+ 2024-02-01 17:38:28,713 INFO SenderThread:237521 [dir_watcher.py:__init__():211] watching files in: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files
7
+ 2024-02-01 17:38:28,713 INFO SenderThread:237521 [sender.py:_start_run_threads():1136] run started: py26nu6m with start time 1706809108.451874
8
+ 2024-02-01 17:38:28,720 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: check_version
9
+ 2024-02-01 17:38:28,720 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: check_version
10
+ 2024-02-01 17:38:28,774 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: run_start
11
+ 2024-02-01 17:38:28,833 DEBUG HandlerThread:237521 [system_info.py:__init__():32] System info init
12
+ 2024-02-01 17:38:28,833 DEBUG HandlerThread:237521 [system_info.py:__init__():47] System info init done
13
+ 2024-02-01 17:38:28,833 INFO HandlerThread:237521 [system_monitor.py:start():194] Starting system monitor
14
+ 2024-02-01 17:38:28,833 INFO SystemMonitor:237521 [system_monitor.py:_start():158] Starting system asset monitoring threads
15
+ 2024-02-01 17:38:28,833 INFO HandlerThread:237521 [system_monitor.py:probe():214] Collecting system info
16
+ 2024-02-01 17:38:28,834 INFO SystemMonitor:237521 [interfaces.py:start():190] Started cpu monitoring
17
+ 2024-02-01 17:38:28,834 INFO SystemMonitor:237521 [interfaces.py:start():190] Started disk monitoring
18
+ 2024-02-01 17:38:28,834 INFO SystemMonitor:237521 [interfaces.py:start():190] Started gpu monitoring
19
+ 2024-02-01 17:38:28,836 INFO SystemMonitor:237521 [interfaces.py:start():190] Started memory monitoring
20
+ 2024-02-01 17:38:28,836 INFO SystemMonitor:237521 [interfaces.py:start():190] Started network monitoring
21
+ 2024-02-01 17:38:28,880 DEBUG HandlerThread:237521 [system_info.py:probe():196] Probing system
22
+ 2024-02-01 17:38:28,883 DEBUG HandlerThread:237521 [system_info.py:_probe_git():181] Probing git
23
+ 2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_info.py:_probe_git():189] Probing git done
24
+ 2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_info.py:probe():244] Probing system done
25
+ 2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-1048-aws-x86_64-with-glibc2.31', 'python': '3.11.5', 'heartbeatAt': '2024-02-01T17:38:28.880477', 'startedAt': '2024-02-01T17:38:28.419493', 'docker': None, 'cuda': None, 'args': ('config_full.yaml',), 'state': 'running', 'program': '/fsx/sanchit/distil-zephyr-1.5b-ssft/run_sft.py', 'codePathLocal': 'run_sft.py', 'codePath': 'run_sft.py', 'git': {'remote': 'https://huggingface.co/sanchit-gandhi/distil-zephyr-1.5b-ssft', 'commit': '79a4ae874a71e67016ded927e7d23351e5c7dab8'}, 'email': None, 'root': '/fsx/sanchit/distil-zephyr-1.5b-ssft', 'host': 'ip-26-0-165-24', 'username': 'sanchit', 'executable': '/fsx/sanchit/miniconda3/envs/venv/bin/python', 'cpu_count': 96, 'cpu_count_logical': 96, 'cpu_freq': {'current': 2731.1230833333334, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3598.237, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3593.987, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3597.474, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3587.972, 'min': 0.0, 'max': 0.0}, {'current': 3597.373, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2899.882, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3598.404, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3597.582, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 290.7472343444824, 'used': 57.44935989379883}}, 'gpu': 'NVIDIA H100 80GB HBM3', 'gpu_count': 8, 'gpu_devices': [{'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}], 'memory': {'total': 1999.9855346679688}}
26
+ 2024-02-01 17:38:28,908 INFO HandlerThread:237521 [system_monitor.py:probe():224] Finished collecting system info
27
+ 2024-02-01 17:38:28,908 INFO HandlerThread:237521 [system_monitor.py:probe():227] Publishing system info
28
+ 2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_info.py:_save_pip():52] Saving list of pip packages installed into the current environment
29
+ 2024-02-01 17:38:28,910 DEBUG HandlerThread:237521 [system_info.py:_save_pip():68] Saving pip packages done
30
+ 2024-02-01 17:38:28,910 DEBUG HandlerThread:237521 [system_info.py:_save_conda():75] Saving list of conda packages installed into the current environment
31
+ 2024-02-01 17:38:29,716 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/conda-environment.yaml
32
+ 2024-02-01 17:38:29,716 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/requirements.txt
33
+ 2024-02-01 17:38:34,420 DEBUG HandlerThread:237521 [system_info.py:_save_conda():87] Saving conda packages done
34
+ 2024-02-01 17:38:34,423 INFO HandlerThread:237521 [system_monitor.py:probe():229] Finished publishing system info
35
+ 2024-02-01 17:38:34,457 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
36
+ 2024-02-01 17:38:34,457 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: keepalive
37
+ 2024-02-01 17:38:34,458 DEBUG SenderThread:237521 [sender.py:send():382] send: files
38
+ 2024-02-01 17:38:34,458 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-metadata.json with policy now
39
+ 2024-02-01 17:38:34,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
40
+ 2024-02-01 17:38:34,465 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
41
+ 2024-02-01 17:38:34,466 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
42
+ 2024-02-01 17:38:34,553 DEBUG SenderThread:237521 [sender.py:send():382] send: telemetry
43
+ 2024-02-01 17:38:34,554 DEBUG SenderThread:237521 [sender.py:send():382] send: config
44
+ 2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
45
+ 2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: telemetry
46
+ 2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
47
+ 2024-02-01 17:38:34,555 WARNING SenderThread:237521 [sender.py:send_metric():1343] Seen metric with glob (shouldn't happen)
48
+ 2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: telemetry
49
+ 2024-02-01 17:38:34,721 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/conda-environment.yaml
50
+ 2024-02-01 17:38:34,722 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-metadata.json
51
+ 2024-02-01 17:38:34,722 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
52
+ 2024-02-01 17:38:34,796 INFO wandb-upload_0:237521 [upload_job.py:push():131] Uploaded file /tmp/tmpfs5f2n7fwandb/421ry27q-wandb-metadata.json
53
+ 2024-02-01 17:38:36,724 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
54
+ 2024-02-01 17:38:39,559 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
55
+ 2024-02-01 17:38:42,110 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
56
+ 2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
57
+ 2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
58
+ 2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
59
+ 2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: history
60
+ 2024-02-01 17:38:42,113 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
61
+ 2024-02-01 17:38:42,116 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
62
+ 2024-02-01 17:38:42,732 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
63
+ 2024-02-01 17:38:44,735 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
64
+ 2024-02-01 17:38:44,770 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
65
+ 2024-02-01 17:38:45,673 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
66
+ 2024-02-01 17:38:45,674 DEBUG SenderThread:237521 [sender.py:send():382] send: history
67
+ 2024-02-01 17:38:45,674 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
68
+ 2024-02-01 17:38:45,676 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
69
+ 2024-02-01 17:38:45,737 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
70
+ 2024-02-01 17:38:46,738 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
71
+ 2024-02-01 17:38:48,741 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
72
+ 2024-02-01 17:38:49,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
73
+ 2024-02-01 17:38:49,465 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
74
+ 2024-02-01 17:38:49,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
75
+ 2024-02-01 17:38:50,190 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
76
+ 2024-02-01 17:38:50,230 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
77
+ 2024-02-01 17:38:50,232 DEBUG SenderThread:237521 [sender.py:send():382] send: history
78
+ 2024-02-01 17:38:50,232 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
79
+ 2024-02-01 17:38:50,234 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
80
+ 2024-02-01 17:38:50,745 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
81
+ 2024-02-01 17:38:50,745 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
82
+ 2024-02-01 17:38:52,747 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
83
+ 2024-02-01 17:38:54,715 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
84
+ 2024-02-01 17:38:54,716 DEBUG SenderThread:237521 [sender.py:send():382] send: history
85
+ 2024-02-01 17:38:54,716 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
86
+ 2024-02-01 17:38:54,718 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
87
+ 2024-02-01 17:38:54,751 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
88
+ 2024-02-01 17:38:54,751 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
89
+ 2024-02-01 17:38:55,617 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
90
+ 2024-02-01 17:38:56,753 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
91
+ 2024-02-01 17:38:58,756 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
92
+ 2024-02-01 17:38:59,239 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
93
+ 2024-02-01 17:38:59,240 DEBUG SenderThread:237521 [sender.py:send():382] send: history
94
+ 2024-02-01 17:38:59,240 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
95
+ 2024-02-01 17:38:59,242 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
96
+ 2024-02-01 17:38:59,758 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
97
+ 2024-02-01 17:39:00,760 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
98
+ 2024-02-01 17:39:01,049 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
99
+ 2024-02-01 17:39:01,762 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/config.yaml
100
+ 2024-02-01 17:39:02,763 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
101
+ 2024-02-01 17:39:03,754 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
102
+ 2024-02-01 17:39:03,755 DEBUG SenderThread:237521 [sender.py:send():382] send: history
103
+ 2024-02-01 17:39:03,755 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
104
+ 2024-02-01 17:39:03,757 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
105
+ 2024-02-01 17:39:03,766 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
106
+ 2024-02-01 17:39:04,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
107
+ 2024-02-01 17:39:04,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
108
+ 2024-02-01 17:39:04,466 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
109
+ 2024-02-01 17:39:04,767 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
110
+ 2024-02-01 17:39:06,478 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
111
+ 2024-02-01 17:39:06,770 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
112
+ 2024-02-01 17:39:08,293 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
113
+ 2024-02-01 17:39:08,294 DEBUG SenderThread:237521 [sender.py:send():382] send: history
114
+ 2024-02-01 17:39:08,295 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
115
+ 2024-02-01 17:39:08,297 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
116
+ 2024-02-01 17:39:08,773 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
117
+ 2024-02-01 17:39:08,774 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
118
+ 2024-02-01 17:39:10,776 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
119
+ 2024-02-01 17:39:11,923 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
120
+ 2024-02-01 17:39:12,779 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
121
+ 2024-02-01 17:39:12,828 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
122
+ 2024-02-01 17:39:12,830 DEBUG SenderThread:237521 [sender.py:send():382] send: history
123
+ 2024-02-01 17:39:12,830 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
124
+ 2024-02-01 17:39:12,832 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
125
+ 2024-02-01 17:39:13,781 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
126
+ 2024-02-01 17:39:14,782 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
127
+ 2024-02-01 17:39:16,785 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
128
+ 2024-02-01 17:39:17,363 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
129
+ 2024-02-01 17:39:17,365 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
130
+ 2024-02-01 17:39:17,366 DEBUG SenderThread:237521 [sender.py:send():382] send: history
131
+ 2024-02-01 17:39:17,367 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
132
+ 2024-02-01 17:39:17,368 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
133
+ 2024-02-01 17:39:17,788 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
134
+ 2024-02-01 17:39:18,789 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
135
+ 2024-02-01 17:39:19,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
136
+ 2024-02-01 17:39:19,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
137
+ 2024-02-01 17:39:19,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
138
+ 2024-02-01 17:39:20,792 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
139
+ 2024-02-01 17:39:21,909 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
140
+ 2024-02-01 17:39:21,911 DEBUG SenderThread:237521 [sender.py:send():382] send: history
141
+ 2024-02-01 17:39:21,911 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
142
+ 2024-02-01 17:39:21,913 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
143
+ 2024-02-01 17:39:22,795 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
144
+ 2024-02-01 17:39:22,796 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
145
+ 2024-02-01 17:39:22,815 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
146
+ 2024-02-01 17:39:24,798 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
147
+ 2024-02-01 17:39:26,448 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
148
+ 2024-02-01 17:39:26,450 DEBUG SenderThread:237521 [sender.py:send():382] send: history
149
+ 2024-02-01 17:39:26,450 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
150
+ 2024-02-01 17:39:26,452 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
151
+ 2024-02-01 17:39:26,802 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
152
+ 2024-02-01 17:39:26,802 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
153
+ 2024-02-01 17:39:28,269 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
154
+ 2024-02-01 17:39:28,805 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
155
+ 2024-02-01 17:39:28,836 DEBUG SystemMonitor:237521 [system_monitor.py:_start():172] Starting system metrics aggregation loop
156
+ 2024-02-01 17:39:28,850 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
157
+ 2024-02-01 17:39:30,807 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
158
+ 2024-02-01 17:39:31,001 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
159
+ 2024-02-01 17:39:31,003 DEBUG SenderThread:237521 [sender.py:send():382] send: history
160
+ 2024-02-01 17:39:31,003 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
161
+ 2024-02-01 17:39:31,005 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
162
+ 2024-02-01 17:39:31,810 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
163
+ 2024-02-01 17:39:32,811 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
164
+ 2024-02-01 17:39:33,729 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
165
+ 2024-02-01 17:39:34,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
166
+ 2024-02-01 17:39:34,465 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
167
+ 2024-02-01 17:39:34,468 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
168
+ 2024-02-01 17:39:34,814 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
169
+ 2024-02-01 17:39:35,548 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
170
+ 2024-02-01 17:39:35,550 DEBUG SenderThread:237521 [sender.py:send():382] send: history
171
+ 2024-02-01 17:39:35,550 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
172
+ 2024-02-01 17:39:35,552 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
173
+ 2024-02-01 17:39:35,816 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
174
+ 2024-02-01 17:39:36,817 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
175
+ 2024-02-01 17:39:38,820 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
176
+ 2024-02-01 17:39:39,188 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
177
+ 2024-02-01 17:39:40,104 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
178
+ 2024-02-01 17:39:40,105 DEBUG SenderThread:237521 [sender.py:send():382] send: history
179
+ 2024-02-01 17:39:40,105 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
180
+ 2024-02-01 17:39:40,108 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
181
+ 2024-02-01 17:39:40,824 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
182
+ 2024-02-01 17:39:40,824 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
183
+ 2024-02-01 17:39:42,826 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
184
+ 2024-02-01 17:39:44,651 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
185
+ 2024-02-01 17:39:44,652 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
186
+ 2024-02-01 17:39:44,653 DEBUG SenderThread:237521 [sender.py:send():382] send: history
187
+ 2024-02-01 17:39:44,653 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
188
+ 2024-02-01 17:39:44,655 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
189
+ 2024-02-01 17:39:44,830 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
190
+ 2024-02-01 17:39:44,830 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
191
+ 2024-02-01 17:39:46,833 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
192
+ 2024-02-01 17:39:48,835 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
193
+ 2024-02-01 17:39:49,211 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
194
+ 2024-02-01 17:39:49,212 DEBUG SenderThread:237521 [sender.py:send():382] send: history
195
+ 2024-02-01 17:39:49,212 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
196
+ 2024-02-01 17:39:49,214 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
197
+ 2024-02-01 17:39:49,466 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
198
+ 2024-02-01 17:39:49,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
199
+ 2024-02-01 17:39:49,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
200
+ 2024-02-01 17:39:49,838 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
201
+ 2024-02-01 17:39:50,121 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
202
+ 2024-02-01 17:39:50,839 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
203
+ 2024-02-01 17:39:52,842 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
204
+ 2024-02-01 17:39:53,762 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
205
+ 2024-02-01 17:39:53,763 DEBUG SenderThread:237521 [sender.py:send():382] send: history
206
+ 2024-02-01 17:39:53,763 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
207
+ 2024-02-01 17:39:53,765 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
208
+ 2024-02-01 17:39:53,844 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
209
+ 2024-02-01 17:39:54,845 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
210
+ 2024-02-01 17:39:55,580 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
211
+ 2024-02-01 17:39:56,848 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
212
+ 2024-02-01 17:39:58,314 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
213
+ 2024-02-01 17:39:58,316 DEBUG SenderThread:237521 [sender.py:send():382] send: history
214
+ 2024-02-01 17:39:58,316 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
215
+ 2024-02-01 17:39:58,318 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
216
+ 2024-02-01 17:39:58,842 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
217
+ 2024-02-01 17:39:58,852 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
218
+ 2024-02-01 17:39:58,852 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
219
+ 2024-02-01 17:40:00,854 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
220
+ 2024-02-01 17:40:01,035 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
221
+ 2024-02-01 17:40:02,853 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
222
+ 2024-02-01 17:40:02,855 DEBUG SenderThread:237521 [sender.py:send():382] send: history
223
+ 2024-02-01 17:40:02,855 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
224
+ 2024-02-01 17:40:02,857 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
225
+ 2024-02-01 17:40:02,858 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
226
+ 2024-02-01 17:40:02,859 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
227
+ 2024-02-01 17:40:04,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
228
+ 2024-02-01 17:40:04,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
229
+ 2024-02-01 17:40:04,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
230
+ 2024-02-01 17:40:04,861 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
231
+ 2024-02-01 17:40:06,498 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
232
+ 2024-02-01 17:40:06,864 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
233
+ 2024-02-01 17:40:07,408 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
234
+ 2024-02-01 17:40:07,409 DEBUG SenderThread:237521 [sender.py:send():382] send: history
235
+ 2024-02-01 17:40:07,409 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
236
+ 2024-02-01 17:40:07,411 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
237
+ 2024-02-01 17:40:07,866 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
238
+ 2024-02-01 17:40:08,867 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
239
+ 2024-02-01 17:40:10,870 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
240
+ 2024-02-01 17:40:11,953 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
241
+ 2024-02-01 17:40:11,954 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
242
+ 2024-02-01 17:40:11,955 DEBUG SenderThread:237521 [sender.py:send():382] send: history
243
+ 2024-02-01 17:40:11,956 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
244
+ 2024-02-01 17:40:11,958 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
245
+ 2024-02-01 17:40:12,874 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
246
+ 2024-02-01 17:40:12,874 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
247
+ 2024-02-01 17:40:14,876 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
248
+ 2024-02-01 17:40:16,879 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
249
+ 2024-02-01 17:40:17,215 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
250
+ 2024-02-01 17:40:18,882 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
251
+ 2024-02-01 17:40:19,958 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
252
+ 2024-02-01 17:40:19,959 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
253
+ 2024-02-01 17:40:19,959 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
254
+ 2024-02-01 17:40:22,278 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
255
+ 2024-02-01 17:40:22,888 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
256
+ 2024-02-01 17:40:24,892 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
257
+ 2024-02-01 17:40:26,895 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
258
+ 2024-02-01 17:40:27,568 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
259
+ 2024-02-01 17:40:27,569 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
260
+ 2024-02-01 17:40:27,571 DEBUG SenderThread:237521 [sender.py:send():382] send: history
261
+ 2024-02-01 17:40:27,571 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
262
+ 2024-02-01 17:40:27,573 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
263
+ 2024-02-01 17:40:27,897 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
264
+ 2024-02-01 17:40:28,844 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
265
+ 2024-02-01 17:40:28,898 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
266
+ 2024-02-01 17:40:30,901 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
267
+ 2024-02-01 17:40:32,115 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
268
+ 2024-02-01 17:40:32,117 DEBUG SenderThread:237521 [sender.py:send():382] send: history
269
+ 2024-02-01 17:40:32,117 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
270
+ 2024-02-01 17:40:32,119 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
271
+ 2024-02-01 17:40:32,904 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
272
+ 2024-02-01 17:40:32,904 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
273
+ 2024-02-01 17:40:33,026 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
274
+ 2024-02-01 17:40:34,907 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
275
+ 2024-02-01 17:40:34,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
276
+ 2024-02-01 17:40:34,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
277
+ 2024-02-01 17:40:34,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
278
+ 2024-02-01 17:40:36,665 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
279
+ 2024-02-01 17:40:36,667 DEBUG SenderThread:237521 [sender.py:send():382] send: history
280
+ 2024-02-01 17:40:36,667 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
281
+ 2024-02-01 17:40:36,669 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
282
+ 2024-02-01 17:40:36,910 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
283
+ 2024-02-01 17:40:36,911 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
284
+ 2024-02-01 17:40:38,487 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
285
+ 2024-02-01 17:40:38,913 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
286
+ 2024-02-01 17:40:40,915 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
287
+ 2024-02-01 17:40:41,219 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
288
+ 2024-02-01 17:40:41,220 DEBUG SenderThread:237521 [sender.py:send():382] send: history
289
+ 2024-02-01 17:40:41,221 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
290
+ 2024-02-01 17:40:41,223 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
291
+ 2024-02-01 17:40:41,917 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
292
+ 2024-02-01 17:40:42,919 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
293
+ 2024-02-01 17:40:43,949 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
294
+ 2024-02-01 17:40:44,922 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
295
+ 2024-02-01 17:40:45,773 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
296
+ 2024-02-01 17:40:45,775 DEBUG SenderThread:237521 [sender.py:send():382] send: history
297
+ 2024-02-01 17:40:45,776 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
298
+ 2024-02-01 17:40:45,778 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
299
+ 2024-02-01 17:40:45,924 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
300
+ 2024-02-01 17:40:46,925 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
301
+ 2024-02-01 17:40:48,927 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
302
+ 2024-02-01 17:40:49,410 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
303
+ 2024-02-01 17:40:49,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
304
+ 2024-02-01 17:40:49,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
305
+ 2024-02-01 17:40:49,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
306
+ 2024-02-01 17:40:50,362 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
307
+ 2024-02-01 17:40:50,930 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
308
+ 2024-02-01 17:40:51,403 DEBUG SenderThread:237521 [sender.py:send():382] send: history
309
+ 2024-02-01 17:40:51,403 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
310
+ 2024-02-01 17:40:51,405 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
311
+ 2024-02-01 17:40:51,932 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
312
+ 2024-02-01 17:40:52,934 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
313
+ 2024-02-01 17:40:54,873 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
314
+ 2024-02-01 17:40:54,874 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
315
+ 2024-02-01 17:40:54,876 DEBUG SenderThread:237521 [sender.py:send():382] send: history
316
+ 2024-02-01 17:40:54,876 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
317
+ 2024-02-01 17:40:54,878 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
318
+ 2024-02-01 17:40:54,937 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
319
+ 2024-02-01 17:40:54,937 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
320
+ 2024-02-01 17:40:56,940 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
321
+ 2024-02-01 17:40:58,846 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
322
+ 2024-02-01 17:40:58,942 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
323
+ 2024-02-01 17:40:59,420 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
324
+ 2024-02-01 17:40:59,422 DEBUG SenderThread:237521 [sender.py:send():382] send: history
325
+ 2024-02-01 17:40:59,422 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
326
+ 2024-02-01 17:40:59,424 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
327
+ 2024-02-01 17:40:59,944 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
328
+ 2024-02-01 17:41:00,330 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
329
+ 2024-02-01 17:41:00,946 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
330
+ 2024-02-01 17:41:02,948 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
331
+ 2024-02-01 17:41:03,975 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
332
+ 2024-02-01 17:41:03,976 DEBUG SenderThread:237521 [sender.py:send():382] send: history
333
+ 2024-02-01 17:41:03,976 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
334
+ 2024-02-01 17:41:03,978 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
335
+ 2024-02-01 17:41:04,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
336
+ 2024-02-01 17:41:04,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
337
+ 2024-02-01 17:41:04,952 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
338
+ 2024-02-01 17:41:04,952 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
339
+ 2024-02-01 17:41:04,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
340
+ 2024-02-01 17:41:05,797 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
341
+ 2024-02-01 17:41:06,955 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
342
+ 2024-02-01 17:41:08,527 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
343
+ 2024-02-01 17:41:08,529 DEBUG SenderThread:237521 [sender.py:send():382] send: history
344
+ 2024-02-01 17:41:08,529 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
345
+ 2024-02-01 17:41:08,531 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
346
+ 2024-02-01 17:41:08,958 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
347
+ 2024-02-01 17:41:08,959 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
348
+ 2024-02-01 17:41:10,961 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
349
+ 2024-02-01 17:41:11,264 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
350
+ 2024-02-01 17:41:12,964 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
351
+ 2024-02-01 17:41:13,085 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
352
+ 2024-02-01 17:41:13,086 DEBUG SenderThread:237521 [sender.py:send():382] send: history
353
+ 2024-02-01 17:41:13,087 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
354
+ 2024-02-01 17:41:13,089 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
355
+ 2024-02-01 17:41:13,966 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
356
+ 2024-02-01 17:41:14,967 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
357
+ 2024-02-01 17:41:16,736 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
358
+ 2024-02-01 17:41:16,969 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
359
+ 2024-02-01 17:41:17,649 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
360
+ 2024-02-01 17:41:17,650 DEBUG SenderThread:237521 [sender.py:send():382] send: history
361
+ 2024-02-01 17:41:17,651 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
362
+ 2024-02-01 17:41:17,653 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
363
+ 2024-02-01 17:41:17,972 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
364
+ 2024-02-01 17:41:18,973 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
365
+ 2024-02-01 17:41:19,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
366
+ 2024-02-01 17:41:19,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
367
+ 2024-02-01 17:41:19,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
368
+ 2024-02-01 17:41:20,976 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
369
+ 2024-02-01 17:41:22,234 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
370
+ 2024-02-01 17:41:22,235 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
371
+ 2024-02-01 17:41:22,236 DEBUG SenderThread:237521 [sender.py:send():382] send: history
372
+ 2024-02-01 17:41:22,236 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
373
+ 2024-02-01 17:41:22,238 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
374
+ 2024-02-01 17:41:22,979 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
375
+ 2024-02-01 17:41:22,979 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
376
+ 2024-02-01 17:41:24,981 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
377
+ 2024-02-01 17:41:26,803 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
378
+ 2024-02-01 17:41:26,804 DEBUG SenderThread:237521 [sender.py:send():382] send: history
379
+ 2024-02-01 17:41:26,805 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
380
+ 2024-02-01 17:41:26,806 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
381
+ 2024-02-01 17:41:26,985 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
382
+ 2024-02-01 17:41:26,985 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
383
+ 2024-02-01 17:41:27,718 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
384
+ 2024-02-01 17:41:28,848 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
385
+ 2024-02-01 17:41:28,987 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
wandb/debug.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Current SDK version is 0.16.1
2
+ 2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Configure stats pid to 237059
3
+ 2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Loading settings from /admin/home/sanchit/.config/wandb/settings
4
+ 2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Loading settings from /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/settings
5
+ 2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_sft.py', 'program_abspath': '/fsx/sanchit/distil-zephyr-1.5b-ssft/run_sft.py', 'program': '/fsx/sanchit/distil-zephyr-1.5b-ssft/run_sft.py'}
8
+ 2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:_log_setup():524] Logging user logs to /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/logs/debug.log
9
+ 2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:_log_setup():525] Logging internal logs to /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/logs/debug-internal.log
10
+ 2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():564] calling init triggers
11
+ 2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():571] wandb.init called with sweep_config: {}
12
+ config: {}
13
+ 2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():614] starting backend
14
+ 2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():618] setting up manager
15
+ 2024-02-01 17:38:28,441 INFO MainThread:237059 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
16
+ 2024-02-01 17:38:28,451 INFO MainThread:237059 [wandb_init.py:init():624] backend started and connected
17
+ 2024-02-01 17:38:28,453 INFO MainThread:237059 [wandb_init.py:init():716] updated telemetry
18
+ 2024-02-01 17:38:28,475 INFO MainThread:237059 [wandb_init.py:init():749] communicating run to backend with 90.0 second timeout
19
+ 2024-02-01 17:38:28,720 INFO MainThread:237059 [wandb_run.py:_on_init():2254] communicating current version
20
+ 2024-02-01 17:38:28,767 INFO MainThread:237059 [wandb_run.py:_on_init():2263] got version response upgrade_message: "wandb version 0.16.2 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
21
+
22
+ 2024-02-01 17:38:28,767 INFO MainThread:237059 [wandb_init.py:init():800] starting run threads in backend
23
+ 2024-02-01 17:38:34,465 INFO MainThread:237059 [wandb_run.py:_console_start():2233] atexit reg
24
+ 2024-02-01 17:38:34,465 INFO MainThread:237059 [wandb_run.py:_redirect():2088] redirect: wrap_raw
25
+ 2024-02-01 17:38:34,466 INFO MainThread:237059 [wandb_run.py:_redirect():2153] Wrapping output streams.
26
+ 2024-02-01 17:38:34,466 INFO MainThread:237059 [wandb_run.py:_redirect():2178] Redirects installed.
27
+ 2024-02-01 17:38:34,467 INFO MainThread:237059 [wandb_init.py:init():841] run started, returning control to user process
28
+ 2024-02-01 17:38:34,468 INFO MainThread:237059 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 32768, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 6, 'num_attention_heads': 32, 'sliding_window': 4096, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'use_cache': False, 'rope_theta': 10000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['MistralForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'sanchit-gandhi/Mistral-7B-v0.1-6-layer', 'transformers_version': '4.36.2', 'model_type': 'mistral', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.1, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb01_17-38-02_ip-26-0-165-24', 'logging_strategy': 'steps', 'logging_first_step': True, 'logging_steps': 5, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'max_seq_length': 2048}
wandb/run-20240201_173828-py26nu6m/files/conda-environment.yaml ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: venv
2
+ channels:
3
+ - pytorch
4
+ - nvidia
5
+ - defaults
6
+ dependencies:
7
+ - _libgcc_mutex=0.1=main
8
+ - _openmp_mutex=5.1=1_gnu
9
+ - blas=1.0=mkl
10
+ - brotli-python=1.0.9=py311h6a678d5_7
11
+ - bzip2=1.0.8=h7b6447c_0
12
+ - ca-certificates=2023.12.12=h06a4308_0
13
+ - certifi=2023.11.17=py311h06a4308_0
14
+ - cffi=1.16.0=py311h5eee18b_0
15
+ - cryptography=41.0.7=py311hdda0065_0
16
+ - cuda-cudart=12.1.105=0
17
+ - cuda-cupti=12.1.105=0
18
+ - cuda-libraries=12.1.0=0
19
+ - cuda-nvrtc=12.1.105=0
20
+ - cuda-nvtx=12.1.105=0
21
+ - cuda-opencl=12.3.101=0
22
+ - cuda-runtime=12.1.0=0
23
+ - ffmpeg=4.3=hf484d3e_0
24
+ - filelock=3.13.1=py311h06a4308_0
25
+ - freetype=2.12.1=h4a9f257_0
26
+ - giflib=5.2.1=h5eee18b_3
27
+ - gmp=6.2.1=h295c915_3
28
+ - gmpy2=2.1.2=py311hc9b5ff0_0
29
+ - gnutls=3.6.15=he1e5248_0
30
+ - intel-openmp=2023.1.0=hdb19cb5_46306
31
+ - jinja2=3.1.2=py311h06a4308_0
32
+ - jpeg=9e=h5eee18b_1
33
+ - lame=3.100=h7b6447c_0
34
+ - lcms2=2.12=h3be6417_0
35
+ - ld_impl_linux-64=2.38=h1181459_1
36
+ - lerc=3.0=h295c915_0
37
+ - libcublas=12.1.0.26=0
38
+ - libcufft=11.0.2.4=0
39
+ - libcufile=1.8.1.2=0
40
+ - libcurand=10.3.4.101=0
41
+ - libcusolver=11.4.4.55=0
42
+ - libcusparse=12.0.2.55=0
43
+ - libdeflate=1.17=h5eee18b_1
44
+ - libffi=3.4.4=h6a678d5_0
45
+ - libgcc-ng=11.2.0=h1234567_1
46
+ - libgomp=11.2.0=h1234567_1
47
+ - libiconv=1.16=h7f8727e_2
48
+ - libidn2=2.3.4=h5eee18b_0
49
+ - libjpeg-turbo=2.0.0=h9bf148f_0
50
+ - libnpp=12.0.2.50=0
51
+ - libnvjitlink=12.1.105=0
52
+ - libnvjpeg=12.1.1.14=0
53
+ - libpng=1.6.39=h5eee18b_0
54
+ - libstdcxx-ng=11.2.0=h1234567_1
55
+ - libtasn1=4.19.0=h5eee18b_0
56
+ - libtiff=4.5.1=h6a678d5_0
57
+ - libunistring=0.9.10=h27cfd23_0
58
+ - libuuid=1.41.5=h5eee18b_0
59
+ - libwebp=1.3.2=h11a3e52_0
60
+ - libwebp-base=1.3.2=h5eee18b_0
61
+ - llvm-openmp=14.0.6=h9e868ea_0
62
+ - lz4-c=1.9.4=h6a678d5_0
63
+ - markupsafe=2.1.1=py311h5eee18b_0
64
+ - mkl=2023.1.0=h213fc3f_46344
65
+ - mkl-service=2.4.0=py311h5eee18b_1
66
+ - mkl_fft=1.3.8=py311h5eee18b_0
67
+ - mkl_random=1.2.4=py311hdb19cb5_0
68
+ - mpc=1.1.0=h10f8cd9_1
69
+ - mpfr=4.0.2=hb69a4c5_1
70
+ - mpmath=1.3.0=py311h06a4308_0
71
+ - ncurses=6.4=h6a678d5_0
72
+ - nettle=3.7.3=hbbd107a_1
73
+ - networkx=3.1=py311h06a4308_0
74
+ - numpy=1.26.2=py311h08b1b3b_0
75
+ - numpy-base=1.26.2=py311hf175353_0
76
+ - openh264=2.1.1=h4ff587b_0
77
+ - openjpeg=2.4.0=h3ad879b_0
78
+ - openssl=3.0.12=h7f8727e_0
79
+ - pycparser=2.21=pyhd3eb1b0_0
80
+ - pyopenssl=23.2.0=py311h06a4308_0
81
+ - pysocks=1.7.1=py311h06a4308_0
82
+ - python=3.11.5=h955ad1f_0
83
+ - pytorch=2.1.2=py3.11_cuda12.1_cudnn8.9.2_0
84
+ - pytorch-cuda=12.1=ha16c6d3_5
85
+ - pytorch-mutex=1.0=cuda
86
+ - pyyaml=6.0.1=py311h5eee18b_0
87
+ - readline=8.2=h5eee18b_0
88
+ - requests=2.31.0=py311h06a4308_0
89
+ - setuptools=68.2.2=py311h06a4308_0
90
+ - sqlite=3.41.2=h5eee18b_0
91
+ - sympy=1.12=py311h06a4308_0
92
+ - tbb=2021.8.0=hdb19cb5_0
93
+ - tk=8.6.12=h1ccaba5_0
94
+ - torchaudio=2.1.2=py311_cu121
95
+ - torchtriton=2.1.0=py311
96
+ - torchvision=0.16.2=py311_cu121
97
+ - typing_extensions=4.7.1=py311h06a4308_0
98
+ - wheel=0.41.2=py311h06a4308_0
99
+ - xz=5.4.5=h5eee18b_0
100
+ - yaml=0.2.5=h7b6447c_0
101
+ - zlib=1.2.13=h5eee18b_0
102
+ - zstd=1.5.5=hc292b87_0
103
+ - pip:
104
+ - absl-py==2.0.0
105
+ - accelerate==0.23.0
106
+ - aiohttp==3.9.1
107
+ - aiosignal==1.3.1
108
+ - annotated-types==0.6.0
109
+ - appdirs==1.4.4
110
+ - astunparse==1.6.3
111
+ - attrs==23.1.0
112
+ - audioread==3.0.1
113
+ - bitsandbytes==0.41.2.post2
114
+ - cachetools==5.3.2
115
+ - chardet==5.2.0
116
+ - charset-normalizer==3.3.2
117
+ - click==8.1.7
118
+ - datasets==2.14.6
119
+ - decorator==5.1.1
120
+ - deepspeed==0.12.2
121
+ - dill==0.3.7
122
+ - docker-pycreds==0.4.0
123
+ - docstring-parser==0.15
124
+ - einops==0.7.0
125
+ - evaluate==0.4.0
126
+ - flash-attn==2.5.2
127
+ - flatbuffers==23.5.26
128
+ - frozenlist==1.4.1
129
+ - fsspec==2023.10.0
130
+ - gast==0.5.4
131
+ - gitdb==4.0.11
132
+ - gitpython==3.1.40
133
+ - google-auth==2.26.1
134
+ - google-auth-oauthlib==1.2.0
135
+ - google-pasta==0.2.0
136
+ - grpcio==1.60.0
137
+ - h5py==3.10.0
138
+ - hf-transfer==0.1.5
139
+ - hjson==3.1.0
140
+ - huggingface-hub==0.20.1
141
+ - idna==3.6
142
+ - jiwer==3.0.3
143
+ - joblib==1.3.2
144
+ - keras==2.15.0
145
+ - lazy-loader==0.3
146
+ - libclang==16.0.6
147
+ - librosa==0.10.1
148
+ - llvmlite==0.41.1
149
+ - markdown==3.5.1
150
+ - markdown-it-py==3.0.0
151
+ - mdurl==0.1.2
152
+ - ml-dtypes==0.2.0
153
+ - msgpack==1.0.7
154
+ - multidict==6.0.4
155
+ - multiprocess==0.70.15
156
+ - ninja==1.11.1.1
157
+ - nltk==3.8.1
158
+ - numba==0.58.1
159
+ - oauthlib==3.2.2
160
+ - opt-einsum==3.3.0
161
+ - packaging==23.2
162
+ - pandas==2.1.4
163
+ - peft==0.7.1
164
+ - pillow==10.2.0
165
+ - pip==23.3.2
166
+ - platformdirs==4.1.0
167
+ - pooch==1.8.0
168
+ - protobuf==3.20.2
169
+ - psutil==5.9.7
170
+ - py-cpuinfo==9.0.0
171
+ - pyarrow==14.0.2
172
+ - pyarrow-hotfix==0.6
173
+ - pyasn1==0.5.1
174
+ - pyasn1-modules==0.3.0
175
+ - pydantic==2.6.0
176
+ - pydantic-core==2.16.1
177
+ - pygments==2.17.2
178
+ - pynvml==11.5.0
179
+ - python-dateutil==2.8.2
180
+ - pytz==2023.3.post1
181
+ - rapidfuzz==3.6.1
182
+ - regex==2023.12.25
183
+ - requests-oauthlib==1.3.1
184
+ - responses==0.18.0
185
+ - rich==13.7.0
186
+ - rsa==4.9
187
+ - safetensors==0.4.1
188
+ - scikit-learn==1.3.2
189
+ - scipy==1.11.4
190
+ - sentencepiece==0.1.99
191
+ - sentry-sdk==1.39.1
192
+ - setproctitle==1.3.3
193
+ - shtab==1.6.5
194
+ - six==1.16.0
195
+ - smmap==5.0.1
196
+ - soundfile==0.12.1
197
+ - soxr==0.3.7
198
+ - tensorboard==2.15.1
199
+ - tensorboard-data-server==0.7.2
200
+ - tensorflow-cpu==2.15.0.post1
201
+ - tensorflow-estimator==2.15.0
202
+ - tensorflow-io-gcs-filesystem==0.35.0
203
+ - termcolor==2.4.0
204
+ - threadpoolctl==3.2.0
205
+ - tokenizers==0.15.0
206
+ - tqdm==4.66.1
207
+ - transformers==4.36.2
208
+ - trl==0.7.7
209
+ - typing-extensions==4.9.0
210
+ - tyro==0.7.0
211
+ - tzdata==2023.3
212
+ - urllib3==2.1.0
213
+ - wandb==0.16.1
214
+ - werkzeug==3.0.1
215
+ - wrapt==1.14.1
216
+ - xxhash==3.4.1
217
+ - yarl==1.9.4
218
+ prefix: /fsx/sanchit/miniconda3/envs/venv
wandb/run-20240201_173828-py26nu6m/files/config.yaml ADDED
@@ -0,0 +1,644 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.11.5
7
+ cli_version: 0.16.1
8
+ framework: huggingface
9
+ huggingface_version: 4.36.2
10
+ is_jupyter_run: false
11
+ is_kaggle_kernel: false
12
+ start_time: 1706809108.451874
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 2
17
+ - 3
18
+ - 5
19
+ - 11
20
+ - 49
21
+ - 51
22
+ - 53
23
+ - 55
24
+ - 71
25
+ - 84
26
+ - 98
27
+ 2:
28
+ - 1
29
+ - 2
30
+ - 3
31
+ - 5
32
+ - 11
33
+ - 49
34
+ - 51
35
+ - 53
36
+ - 55
37
+ - 71
38
+ - 84
39
+ - 98
40
+ 3:
41
+ - 7
42
+ - 23
43
+ 4: 3.11.5
44
+ 5: 0.16.1
45
+ 6: 4.36.2
46
+ 8:
47
+ - 5
48
+ 9:
49
+ 1: transformers_trainer
50
+ 13: linux-x86_64
51
+ m:
52
+ - 1: train/global_step
53
+ 6:
54
+ - 3
55
+ - 1: train/loss
56
+ 5: 1
57
+ 6:
58
+ - 1
59
+ - 1: train/learning_rate
60
+ 5: 1
61
+ 6:
62
+ - 1
63
+ - 1: train/epoch
64
+ 5: 1
65
+ 6:
66
+ - 1
67
+ vocab_size:
68
+ desc: null
69
+ value: 32000
70
+ max_position_embeddings:
71
+ desc: null
72
+ value: 32768
73
+ hidden_size:
74
+ desc: null
75
+ value: 4096
76
+ intermediate_size:
77
+ desc: null
78
+ value: 14336
79
+ num_hidden_layers:
80
+ desc: null
81
+ value: 6
82
+ num_attention_heads:
83
+ desc: null
84
+ value: 32
85
+ sliding_window:
86
+ desc: null
87
+ value: 4096
88
+ num_key_value_heads:
89
+ desc: null
90
+ value: 8
91
+ hidden_act:
92
+ desc: null
93
+ value: silu
94
+ initializer_range:
95
+ desc: null
96
+ value: 0.02
97
+ rms_norm_eps:
98
+ desc: null
99
+ value: 1.0e-05
100
+ use_cache:
101
+ desc: null
102
+ value: false
103
+ rope_theta:
104
+ desc: null
105
+ value: 10000.0
106
+ attention_dropout:
107
+ desc: null
108
+ value: 0.0
109
+ return_dict:
110
+ desc: null
111
+ value: true
112
+ output_hidden_states:
113
+ desc: null
114
+ value: false
115
+ output_attentions:
116
+ desc: null
117
+ value: false
118
+ torchscript:
119
+ desc: null
120
+ value: false
121
+ torch_dtype:
122
+ desc: null
123
+ value: bfloat16
124
+ use_bfloat16:
125
+ desc: null
126
+ value: false
127
+ tf_legacy_loss:
128
+ desc: null
129
+ value: false
130
+ pruned_heads:
131
+ desc: null
132
+ value: {}
133
+ tie_word_embeddings:
134
+ desc: null
135
+ value: false
136
+ is_encoder_decoder:
137
+ desc: null
138
+ value: false
139
+ is_decoder:
140
+ desc: null
141
+ value: false
142
+ cross_attention_hidden_size:
143
+ desc: null
144
+ value: null
145
+ add_cross_attention:
146
+ desc: null
147
+ value: false
148
+ tie_encoder_decoder:
149
+ desc: null
150
+ value: false
151
+ max_length:
152
+ desc: null
153
+ value: 20
154
+ min_length:
155
+ desc: null
156
+ value: 0
157
+ do_sample:
158
+ desc: null
159
+ value: false
160
+ early_stopping:
161
+ desc: null
162
+ value: false
163
+ num_beams:
164
+ desc: null
165
+ value: 1
166
+ num_beam_groups:
167
+ desc: null
168
+ value: 1
169
+ diversity_penalty:
170
+ desc: null
171
+ value: 0.0
172
+ temperature:
173
+ desc: null
174
+ value: 1.0
175
+ top_k:
176
+ desc: null
177
+ value: 50
178
+ top_p:
179
+ desc: null
180
+ value: 1.0
181
+ typical_p:
182
+ desc: null
183
+ value: 1.0
184
+ repetition_penalty:
185
+ desc: null
186
+ value: 1.0
187
+ length_penalty:
188
+ desc: null
189
+ value: 1.0
190
+ no_repeat_ngram_size:
191
+ desc: null
192
+ value: 0
193
+ encoder_no_repeat_ngram_size:
194
+ desc: null
195
+ value: 0
196
+ bad_words_ids:
197
+ desc: null
198
+ value: null
199
+ num_return_sequences:
200
+ desc: null
201
+ value: 1
202
+ chunk_size_feed_forward:
203
+ desc: null
204
+ value: 0
205
+ output_scores:
206
+ desc: null
207
+ value: false
208
+ return_dict_in_generate:
209
+ desc: null
210
+ value: false
211
+ forced_bos_token_id:
212
+ desc: null
213
+ value: null
214
+ forced_eos_token_id:
215
+ desc: null
216
+ value: null
217
+ remove_invalid_values:
218
+ desc: null
219
+ value: false
220
+ exponential_decay_length_penalty:
221
+ desc: null
222
+ value: null
223
+ suppress_tokens:
224
+ desc: null
225
+ value: null
226
+ begin_suppress_tokens:
227
+ desc: null
228
+ value: null
229
+ architectures:
230
+ desc: null
231
+ value:
232
+ - MistralForCausalLM
233
+ finetuning_task:
234
+ desc: null
235
+ value: null
236
+ id2label:
237
+ desc: null
238
+ value:
239
+ '0': LABEL_0
240
+ '1': LABEL_1
241
+ label2id:
242
+ desc: null
243
+ value:
244
+ LABEL_0: 0
245
+ LABEL_1: 1
246
+ tokenizer_class:
247
+ desc: null
248
+ value: null
249
+ prefix:
250
+ desc: null
251
+ value: null
252
+ bos_token_id:
253
+ desc: null
254
+ value: 1
255
+ pad_token_id:
256
+ desc: null
257
+ value: null
258
+ eos_token_id:
259
+ desc: null
260
+ value: 2
261
+ sep_token_id:
262
+ desc: null
263
+ value: null
264
+ decoder_start_token_id:
265
+ desc: null
266
+ value: null
267
+ task_specific_params:
268
+ desc: null
269
+ value: null
270
+ problem_type:
271
+ desc: null
272
+ value: null
273
+ _name_or_path:
274
+ desc: null
275
+ value: sanchit-gandhi/Mistral-7B-v0.1-6-layer
276
+ transformers_version:
277
+ desc: null
278
+ value: 4.36.2
279
+ model_type:
280
+ desc: null
281
+ value: mistral
282
+ output_dir:
283
+ desc: null
284
+ value: ./
285
+ overwrite_output_dir:
286
+ desc: null
287
+ value: true
288
+ do_train:
289
+ desc: null
290
+ value: false
291
+ do_eval:
292
+ desc: null
293
+ value: true
294
+ do_predict:
295
+ desc: null
296
+ value: false
297
+ evaluation_strategy:
298
+ desc: null
299
+ value: epoch
300
+ prediction_loss_only:
301
+ desc: null
302
+ value: false
303
+ per_device_train_batch_size:
304
+ desc: null
305
+ value: 16
306
+ per_device_eval_batch_size:
307
+ desc: null
308
+ value: 8
309
+ per_gpu_train_batch_size:
310
+ desc: null
311
+ value: null
312
+ per_gpu_eval_batch_size:
313
+ desc: null
314
+ value: null
315
+ gradient_accumulation_steps:
316
+ desc: null
317
+ value: 1
318
+ eval_accumulation_steps:
319
+ desc: null
320
+ value: null
321
+ eval_delay:
322
+ desc: null
323
+ value: 0
324
+ learning_rate:
325
+ desc: null
326
+ value: 2.0e-05
327
+ weight_decay:
328
+ desc: null
329
+ value: 0.0
330
+ adam_beta1:
331
+ desc: null
332
+ value: 0.9
333
+ adam_beta2:
334
+ desc: null
335
+ value: 0.999
336
+ adam_epsilon:
337
+ desc: null
338
+ value: 1.0e-08
339
+ max_grad_norm:
340
+ desc: null
341
+ value: 1.0
342
+ num_train_epochs:
343
+ desc: null
344
+ value: 1
345
+ max_steps:
346
+ desc: null
347
+ value: -1
348
+ lr_scheduler_type:
349
+ desc: null
350
+ value: cosine
351
+ lr_scheduler_kwargs:
352
+ desc: null
353
+ value: {}
354
+ warmup_ratio:
355
+ desc: null
356
+ value: 0.1
357
+ warmup_steps:
358
+ desc: null
359
+ value: 0
360
+ log_level:
361
+ desc: null
362
+ value: info
363
+ log_level_replica:
364
+ desc: null
365
+ value: warning
366
+ log_on_each_node:
367
+ desc: null
368
+ value: true
369
+ logging_dir:
370
+ desc: null
371
+ value: ./runs/Feb01_17-38-02_ip-26-0-165-24
372
+ logging_strategy:
373
+ desc: null
374
+ value: steps
375
+ logging_first_step:
376
+ desc: null
377
+ value: true
378
+ logging_steps:
379
+ desc: null
380
+ value: 5
381
+ logging_nan_inf_filter:
382
+ desc: null
383
+ value: true
384
+ save_strategy:
385
+ desc: null
386
+ value: steps
387
+ save_steps:
388
+ desc: null
389
+ value: 100
390
+ save_total_limit:
391
+ desc: null
392
+ value: 1
393
+ save_safetensors:
394
+ desc: null
395
+ value: true
396
+ save_on_each_node:
397
+ desc: null
398
+ value: false
399
+ save_only_model:
400
+ desc: null
401
+ value: false
402
+ no_cuda:
403
+ desc: null
404
+ value: false
405
+ use_cpu:
406
+ desc: null
407
+ value: false
408
+ use_mps_device:
409
+ desc: null
410
+ value: false
411
+ seed:
412
+ desc: null
413
+ value: 42
414
+ data_seed:
415
+ desc: null
416
+ value: null
417
+ jit_mode_eval:
418
+ desc: null
419
+ value: false
420
+ use_ipex:
421
+ desc: null
422
+ value: false
423
+ bf16:
424
+ desc: null
425
+ value: true
426
+ fp16:
427
+ desc: null
428
+ value: false
429
+ fp16_opt_level:
430
+ desc: null
431
+ value: O1
432
+ half_precision_backend:
433
+ desc: null
434
+ value: auto
435
+ bf16_full_eval:
436
+ desc: null
437
+ value: false
438
+ fp16_full_eval:
439
+ desc: null
440
+ value: false
441
+ tf32:
442
+ desc: null
443
+ value: null
444
+ local_rank:
445
+ desc: null
446
+ value: 0
447
+ ddp_backend:
448
+ desc: null
449
+ value: null
450
+ tpu_num_cores:
451
+ desc: null
452
+ value: null
453
+ tpu_metrics_debug:
454
+ desc: null
455
+ value: false
456
+ debug:
457
+ desc: null
458
+ value: []
459
+ dataloader_drop_last:
460
+ desc: null
461
+ value: false
462
+ eval_steps:
463
+ desc: null
464
+ value: null
465
+ dataloader_num_workers:
466
+ desc: null
467
+ value: 0
468
+ past_index:
469
+ desc: null
470
+ value: -1
471
+ run_name:
472
+ desc: null
473
+ value: ./
474
+ disable_tqdm:
475
+ desc: null
476
+ value: false
477
+ remove_unused_columns:
478
+ desc: null
479
+ value: true
480
+ label_names:
481
+ desc: null
482
+ value: null
483
+ load_best_model_at_end:
484
+ desc: null
485
+ value: false
486
+ metric_for_best_model:
487
+ desc: null
488
+ value: null
489
+ greater_is_better:
490
+ desc: null
491
+ value: null
492
+ ignore_data_skip:
493
+ desc: null
494
+ value: false
495
+ fsdp:
496
+ desc: null
497
+ value: []
498
+ fsdp_min_num_params:
499
+ desc: null
500
+ value: 0
501
+ fsdp_config:
502
+ desc: null
503
+ value:
504
+ min_num_params: 0
505
+ xla: false
506
+ xla_fsdp_grad_ckpt: false
507
+ fsdp_transformer_layer_cls_to_wrap:
508
+ desc: null
509
+ value: null
510
+ deepspeed:
511
+ desc: null
512
+ value: null
513
+ label_smoothing_factor:
514
+ desc: null
515
+ value: 0.0
516
+ optim:
517
+ desc: null
518
+ value: adamw_torch
519
+ optim_args:
520
+ desc: null
521
+ value: null
522
+ adafactor:
523
+ desc: null
524
+ value: false
525
+ group_by_length:
526
+ desc: null
527
+ value: false
528
+ length_column_name:
529
+ desc: null
530
+ value: length
531
+ report_to:
532
+ desc: null
533
+ value:
534
+ - tensorboard
535
+ - wandb
536
+ ddp_find_unused_parameters:
537
+ desc: null
538
+ value: null
539
+ ddp_bucket_cap_mb:
540
+ desc: null
541
+ value: null
542
+ ddp_broadcast_buffers:
543
+ desc: null
544
+ value: null
545
+ dataloader_pin_memory:
546
+ desc: null
547
+ value: true
548
+ dataloader_persistent_workers:
549
+ desc: null
550
+ value: false
551
+ skip_memory_metrics:
552
+ desc: null
553
+ value: true
554
+ use_legacy_prediction_loop:
555
+ desc: null
556
+ value: false
557
+ push_to_hub:
558
+ desc: null
559
+ value: true
560
+ resume_from_checkpoint:
561
+ desc: null
562
+ value: null
563
+ hub_model_id:
564
+ desc: null
565
+ value: null
566
+ hub_strategy:
567
+ desc: null
568
+ value: every_save
569
+ hub_token:
570
+ desc: null
571
+ value: <HUB_TOKEN>
572
+ hub_private_repo:
573
+ desc: null
574
+ value: false
575
+ hub_always_push:
576
+ desc: null
577
+ value: false
578
+ gradient_checkpointing:
579
+ desc: null
580
+ value: true
581
+ gradient_checkpointing_kwargs:
582
+ desc: null
583
+ value:
584
+ use_reentrant: false
585
+ include_inputs_for_metrics:
586
+ desc: null
587
+ value: false
588
+ fp16_backend:
589
+ desc: null
590
+ value: auto
591
+ push_to_hub_model_id:
592
+ desc: null
593
+ value: null
594
+ push_to_hub_organization:
595
+ desc: null
596
+ value: null
597
+ push_to_hub_token:
598
+ desc: null
599
+ value: <PUSH_TO_HUB_TOKEN>
600
+ mp_parameters:
601
+ desc: null
602
+ value: ''
603
+ auto_find_batch_size:
604
+ desc: null
605
+ value: false
606
+ full_determinism:
607
+ desc: null
608
+ value: false
609
+ torchdynamo:
610
+ desc: null
611
+ value: null
612
+ ray_scope:
613
+ desc: null
614
+ value: last
615
+ ddp_timeout:
616
+ desc: null
617
+ value: 1800
618
+ torch_compile:
619
+ desc: null
620
+ value: false
621
+ torch_compile_backend:
622
+ desc: null
623
+ value: null
624
+ torch_compile_mode:
625
+ desc: null
626
+ value: null
627
+ dispatch_batches:
628
+ desc: null
629
+ value: null
630
+ split_batches:
631
+ desc: null
632
+ value: false
633
+ include_tokens_per_second:
634
+ desc: null
635
+ value: false
636
+ include_num_input_tokens_seen:
637
+ desc: null
638
+ value: false
639
+ neftune_noise_alpha:
640
+ desc: null
641
+ value: null
642
+ max_seq_length:
643
+ desc: null
644
+ value: 2048
wandb/run-20240201_173828-py26nu6m/files/output.log ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ 0%| | 0/1090 [00:00<?, ?it/s][WARNING|logging.py:314] 2024-02-01 17:38:34,491 >> You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
3
+ {'loss': 13.9185, 'learning_rate': 1.8348623853211012e-07, 'epoch': 0.0}
4
+ 0%|▏ | 3/1090 [00:09<43:27, 2.40s/it]
5
+
6
+
7
+
8
+ 1%|▋ | 9/1090 [00:14<18:27, 1.02s/it]
9
+
10
+
11
+ 1%|█ | 14/1090 [00:19<16:33, 1.08it/s]
12
+
13
+
14
+ 2%|█▎ | 18/1090 [00:22<16:14, 1.10it/s]
15
+
16
+
17
+ 2%|█▋ | 23/1090 [00:27<16:05, 1.11it/s]
18
+
19
+
20
+
21
+ 3%|██▏ | 29/1090 [00:32<16:02, 1.10it/s]
22
+
23
+
24
+ 3%|██▌ | 34/1090 [00:37<15:58, 1.10it/s]
25
+
26
+
27
+ 3%|██▊ | 38/1090 [00:41<15:53, 1.10it/s]
28
+
29
+
30
+ 4%|███ | 42/1090 [00:44<15:50, 1.10it/s]
31
+
32
+
33
+
34
+ 4%|███▋ | 49/1090 [00:51<15:46, 1.10it/s]
35
+
36
+
37
+ 5%|████ | 54/1090 [00:55<15:42, 1.10it/s]
38
+
39
+
40
+ 5%|████▎ | 58/1090 [00:59<15:38, 1.10it/s]
41
+
42
+
43
+
44
+ 6%|████▊ | 64/1090 [01:04<15:34, 1.10it/s]
45
+
46
+
47
+ 6%|█████▏ | 69/1090 [01:09<15:30, 1.10it/s]
48
+
49
+
50
+ 7%|█████▍ | 73/1090 [01:12<15:26, 1.10it/s]
51
+
52
+
53
+
54
+ 7%|█████▉ | 80/1090 [01:19<15:18, 1.10it/s]
55
+
56
+
57
+ 8%|██████▏ | 84/1090 [01:22<15:16, 1.10it/s]
58
+
59
+
60
+ 8%|██████▌ | 89/1090 [01:27<15:09, 1.10it/s]
61
+
62
+
63
+ 9%|██████▉ | 93/1090 [01:31<15:08, 1.10it/s]
64
+
65
+
66
+
67
+ 9%|███████▎ | 100/1090 [01:37<14:59, 1.10it/s]
68
+ 9%|███████▎ | 100/1090 [01:37<14:59, 1.10it/s][INFO|trainer.py:2889] 2024-02-01 17:40:13,014 >> Saving model checkpoint to ./tmp-checkpoint-100
69
+ [INFO|configuration_utils.py:483] 2024-02-01 17:40:13,018 >> Configuration saved in ./tmp-checkpoint-100/config.json
70
+ [INFO|configuration_utils.py:594] 2024-02-01 17:40:13,020 >> Configuration saved in ./tmp-checkpoint-100/generation_config.json
71
+ [INFO|modeling_utils.py:2382] 2024-02-01 17:40:16,055 >> Model weights saved in ./tmp-checkpoint-100/pytorch_model.bin
72
+ [INFO|tokenization_utils_base.py:2432] 2024-02-01 17:40:16,059 >> tokenizer config file saved in ./tmp-checkpoint-100/tokenizer_config.json
73
+ [INFO|tokenization_utils_base.py:2441] 2024-02-01 17:40:16,061 >> Special tokens file saved in ./tmp-checkpoint-100/special_tokens_map.json
74
+ /fsx/sanchit/miniconda3/envs/venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.
75
+ warnings.warn(
76
+ [2024-02-01 17:40:16,087] [INFO] [logging.py:96:log_dist] [Rank 0] [Torch] Checkpoint global_step100 is about to be saved!
77
+ [2024-02-01 17:40:16,093] [INFO] [logging.py:96:log_dist] [Rank 0] Saving model checkpoint: ./tmp-checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt
78
+ [2024-02-01 17:40:16,093] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving ./tmp-checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt...
79
+ [2024-02-01 17:40:16,210] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved ./tmp-checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt.
80
+ [2024-02-01 17:40:16,214] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving ./tmp-checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt...
81
+ [2024-02-01 17:40:19,957] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved ./tmp-checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt.
82
+ [2024-02-01 17:40:19,962] [INFO] [engine.py:3393:_save_zero_checkpoint] zero checkpoint saved ./tmp-checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
83
+ [2024-02-01 17:40:20,277] [INFO] [torch_checkpoint_engine.py:33:commit] [Torch] Checkpoint global_step100 is ready now!
84
+ [INFO|tokenization_utils_base.py:2432] 2024-02-01 17:40:22,999 >> tokenizer config file saved in ./tokenizer_config.json
85
+ [INFO|tokenization_utils_base.py:2441] 2024-02-01 17:40:23,001 >> Special tokens file saved in ./special_tokens_map.json
86
+
87
+ 9%|███████▌ | 103/1090 [01:51<41:42, 2.54s/it]
88
+
89
+
90
+
91
+ 10%|████████ | 110/1090 [01:57<17:02, 1.04s/it]
92
+
93
+
94
+ 10%|████████▎ | 114/1090 [02:01<15:18, 1.06it/s]
95
+
96
+
97
+ 11%|████████▋ | 118/1090 [02:04<14:52, 1.09it/s]
98
+
99
+
100
+
101
+ 11%|█████████▏ | 125/1090 [02:11<14:39, 1.10it/s]
102
+
103
+
104
+ 12%|█████████▍ | 129/1090 [02:14<14:34, 1.10it/s]
105
+
106
+
107
+ 12%|█████████▊ | 134/1090 [02:19<14:31, 1.10it/s]
108
+
109
+
110
+ 13%|██████████▏ | 138/1090 [02:23<14:26, 1.10it/s]
111
+
112
+
113
+
114
+ 13%|██████████▋ | 145/1090 [02:29<14:19, 1.10it/s]
115
+
116
+
117
+ 14%|██████████▉ | 149/1090 [02:33<14:17, 1.10it/s]
118
+
119
+
120
+ 14%|███████████▏ | 153/1090 [02:36<14:14, 1.10it/s]
121
+
122
+
123
+
124
+ 15%|███████████▋ | 160/1090 [02:43<14:08, 1.10it/s]
125
+
126
+
127
+ 15%|████████████ | 164/1090 [02:46<14:07, 1.09it/s]
128
+
129
+
130
+ 16%|████████████▍ | 169/1090 [02:51<14:01, 1.09it/s]
131
+
wandb/run-20240201_173828-py26nu6m/files/requirements.txt ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.0.0
2
+ accelerate==0.23.0
3
+ aiohttp==3.9.1
4
+ aiosignal==1.3.1
5
+ alignment-handbook==0.4.0.dev0
6
+ annotated-types==0.6.0
7
+ appdirs==1.4.4
8
+ astunparse==1.6.3
9
+ attrs==23.1.0
10
+ audioread==3.0.1
11
+ bitsandbytes==0.41.2.post2
12
+ brotli==1.0.9
13
+ cachetools==5.3.2
14
+ certifi==2023.11.17
15
+ cffi==1.16.0
16
+ chardet==5.2.0
17
+ charset-normalizer==2.0.4
18
+ click==8.1.7
19
+ cryptography==41.0.7
20
+ datasets==2.14.6
21
+ decorator==5.1.1
22
+ deepspeed==0.12.2
23
+ dill==0.3.7
24
+ docker-pycreds==0.4.0
25
+ docstring-parser==0.15
26
+ einops==0.7.0
27
+ evaluate==0.4.0
28
+ filelock==3.13.1
29
+ flash-attn==2.5.2
30
+ flatbuffers==23.5.26
31
+ frozenlist==1.4.1
32
+ fsspec==2023.10.0
33
+ gast==0.5.4
34
+ gitdb==4.0.11
35
+ gitpython==3.1.40
36
+ gmpy2==2.1.2
37
+ google-auth-oauthlib==1.2.0
38
+ google-auth==2.26.1
39
+ google-pasta==0.2.0
40
+ grpcio==1.60.0
41
+ h5py==3.10.0
42
+ hf-transfer==0.1.5
43
+ hjson==3.1.0
44
+ huggingface-hub==0.20.1
45
+ idna==3.4
46
+ jinja2==3.1.2
47
+ jiwer==3.0.3
48
+ joblib==1.3.2
49
+ keras==2.15.0
50
+ lazy-loader==0.3
51
+ libclang==16.0.6
52
+ librosa==0.10.1
53
+ llvmlite==0.41.1
54
+ markdown-it-py==3.0.0
55
+ markdown==3.5.1
56
+ markupsafe==2.1.1
57
+ mdurl==0.1.2
58
+ mkl-fft==1.3.8
59
+ mkl-random==1.2.4
60
+ mkl-service==2.4.0
61
+ ml-dtypes==0.2.0
62
+ mpmath==1.3.0
63
+ msgpack==1.0.7
64
+ multidict==6.0.4
65
+ multiprocess==0.70.15
66
+ networkx==3.1
67
+ ninja==1.11.1.1
68
+ nltk==3.8.1
69
+ numba==0.58.1
70
+ numpy==1.26.2
71
+ oauthlib==3.2.2
72
+ opt-einsum==3.3.0
73
+ packaging==23.2
74
+ pandas==2.1.4
75
+ peft==0.7.1
76
+ pillow==10.2.0
77
+ pip==23.3.2
78
+ platformdirs==4.1.0
79
+ pooch==1.8.0
80
+ protobuf==3.20.2
81
+ psutil==5.9.7
82
+ py-cpuinfo==9.0.0
83
+ pyarrow-hotfix==0.6
84
+ pyarrow==14.0.2
85
+ pyasn1-modules==0.3.0
86
+ pyasn1==0.5.1
87
+ pycparser==2.21
88
+ pydantic-core==2.16.1
89
+ pydantic==2.6.0
90
+ pygments==2.17.2
91
+ pynvml==11.5.0
92
+ pyopenssl==23.2.0
93
+ pysocks==1.7.1
94
+ python-dateutil==2.8.2
95
+ pytz==2023.3.post1
96
+ pyyaml==6.0.1
97
+ rapidfuzz==3.6.1
98
+ regex==2023.12.25
99
+ requests-oauthlib==1.3.1
100
+ requests==2.31.0
101
+ responses==0.18.0
102
+ rich==13.7.0
103
+ rsa==4.9
104
+ safetensors==0.4.1
105
+ scikit-learn==1.3.2
106
+ scipy==1.11.4
107
+ sentencepiece==0.1.99
108
+ sentry-sdk==1.39.1
109
+ setproctitle==1.3.3
110
+ setuptools==68.2.2
111
+ shtab==1.6.5
112
+ six==1.16.0
113
+ smmap==5.0.1
114
+ soundfile==0.12.1
115
+ soxr==0.3.7
116
+ sympy==1.12
117
+ tensorboard-data-server==0.7.2
118
+ tensorboard==2.15.1
119
+ tensorflow-cpu==2.15.0.post1
120
+ tensorflow-estimator==2.15.0
121
+ tensorflow-io-gcs-filesystem==0.35.0
122
+ termcolor==2.4.0
123
+ threadpoolctl==3.2.0
124
+ tokenizers==0.15.0
125
+ torch==2.1.2
126
+ torchaudio==2.1.2
127
+ torchvision==0.16.2
128
+ tqdm==4.66.1
129
+ transformers==4.36.2
130
+ triton==2.1.0
131
+ trl==0.7.7
132
+ typing-extensions==4.7.1
133
+ tyro==0.7.0
134
+ tzdata==2023.3
135
+ urllib3==1.26.18
136
+ wandb==0.16.1
137
+ werkzeug==3.0.1
138
+ wheel==0.41.2
139
+ wrapt==1.14.1
140
+ xxhash==3.4.1
141
+ yarl==1.9.4
wandb/run-20240201_173828-py26nu6m/files/wandb-metadata.json ADDED
@@ -0,0 +1,558 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-1048-aws-x86_64-with-glibc2.31",
3
+ "python": "3.11.5",
4
+ "heartbeatAt": "2024-02-01T17:38:28.880477",
5
+ "startedAt": "2024-02-01T17:38:28.419493",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [
9
+ "config_full.yaml"
10
+ ],
11
+ "state": "running",
12
+ "program": "/fsx/sanchit/distil-zephyr-1.5b-ssft/run_sft.py",
13
+ "codePathLocal": "run_sft.py",
14
+ "codePath": "run_sft.py",
15
+ "git": {
16
+ "remote": "https://huggingface.co/sanchit-gandhi/distil-zephyr-1.5b-ssft",
17
+ "commit": "79a4ae874a71e67016ded927e7d23351e5c7dab8"
18
+ },
19
+ "email": null,
20
+ "root": "/fsx/sanchit/distil-zephyr-1.5b-ssft",
21
+ "host": "ip-26-0-165-24",
22
+ "username": "sanchit",
23
+ "executable": "/fsx/sanchit/miniconda3/envs/venv/bin/python",
24
+ "cpu_count": 96,
25
+ "cpu_count_logical": 96,
26
+ "cpu_freq": {
27
+ "current": 2731.1230833333334,
28
+ "min": 0.0,
29
+ "max": 0.0
30
+ },
31
+ "cpu_freq_per_core": [
32
+ {
33
+ "current": 2650.0,
34
+ "min": 0.0,
35
+ "max": 0.0
36
+ },
37
+ {
38
+ "current": 3598.237,
39
+ "min": 0.0,
40
+ "max": 0.0
41
+ },
42
+ {
43
+ "current": 2650.0,
44
+ "min": 0.0,
45
+ "max": 0.0
46
+ },
47
+ {
48
+ "current": 2650.0,
49
+ "min": 0.0,
50
+ "max": 0.0
51
+ },
52
+ {
53
+ "current": 2650.0,
54
+ "min": 0.0,
55
+ "max": 0.0
56
+ },
57
+ {
58
+ "current": 3593.987,
59
+ "min": 0.0,
60
+ "max": 0.0
61
+ },
62
+ {
63
+ "current": 2650.0,
64
+ "min": 0.0,
65
+ "max": 0.0
66
+ },
67
+ {
68
+ "current": 2650.0,
69
+ "min": 0.0,
70
+ "max": 0.0
71
+ },
72
+ {
73
+ "current": 2650.0,
74
+ "min": 0.0,
75
+ "max": 0.0
76
+ },
77
+ {
78
+ "current": 2650.0,
79
+ "min": 0.0,
80
+ "max": 0.0
81
+ },
82
+ {
83
+ "current": 2650.0,
84
+ "min": 0.0,
85
+ "max": 0.0
86
+ },
87
+ {
88
+ "current": 2650.0,
89
+ "min": 0.0,
90
+ "max": 0.0
91
+ },
92
+ {
93
+ "current": 3597.474,
94
+ "min": 0.0,
95
+ "max": 0.0
96
+ },
97
+ {
98
+ "current": 2650.0,
99
+ "min": 0.0,
100
+ "max": 0.0
101
+ },
102
+ {
103
+ "current": 2650.0,
104
+ "min": 0.0,
105
+ "max": 0.0
106
+ },
107
+ {
108
+ "current": 3587.972,
109
+ "min": 0.0,
110
+ "max": 0.0
111
+ },
112
+ {
113
+ "current": 3597.373,
114
+ "min": 0.0,
115
+ "max": 0.0
116
+ },
117
+ {
118
+ "current": 2650.0,
119
+ "min": 0.0,
120
+ "max": 0.0
121
+ },
122
+ {
123
+ "current": 2650.0,
124
+ "min": 0.0,
125
+ "max": 0.0
126
+ },
127
+ {
128
+ "current": 2650.0,
129
+ "min": 0.0,
130
+ "max": 0.0
131
+ },
132
+ {
133
+ "current": 2899.882,
134
+ "min": 0.0,
135
+ "max": 0.0
136
+ },
137
+ {
138
+ "current": 2650.0,
139
+ "min": 0.0,
140
+ "max": 0.0
141
+ },
142
+ {
143
+ "current": 2650.0,
144
+ "min": 0.0,
145
+ "max": 0.0
146
+ },
147
+ {
148
+ "current": 2650.0,
149
+ "min": 0.0,
150
+ "max": 0.0
151
+ },
152
+ {
153
+ "current": 3598.404,
154
+ "min": 0.0,
155
+ "max": 0.0
156
+ },
157
+ {
158
+ "current": 2650.0,
159
+ "min": 0.0,
160
+ "max": 0.0
161
+ },
162
+ {
163
+ "current": 3597.582,
164
+ "min": 0.0,
165
+ "max": 0.0
166
+ },
167
+ {
168
+ "current": 2650.0,
169
+ "min": 0.0,
170
+ "max": 0.0
171
+ },
172
+ {
173
+ "current": 2650.0,
174
+ "min": 0.0,
175
+ "max": 0.0
176
+ },
177
+ {
178
+ "current": 2650.0,
179
+ "min": 0.0,
180
+ "max": 0.0
181
+ },
182
+ {
183
+ "current": 2650.0,
184
+ "min": 0.0,
185
+ "max": 0.0
186
+ },
187
+ {
188
+ "current": 2650.0,
189
+ "min": 0.0,
190
+ "max": 0.0
191
+ },
192
+ {
193
+ "current": 2650.0,
194
+ "min": 0.0,
195
+ "max": 0.0
196
+ },
197
+ {
198
+ "current": 2650.0,
199
+ "min": 0.0,
200
+ "max": 0.0
201
+ },
202
+ {
203
+ "current": 2650.0,
204
+ "min": 0.0,
205
+ "max": 0.0
206
+ },
207
+ {
208
+ "current": 2650.0,
209
+ "min": 0.0,
210
+ "max": 0.0
211
+ },
212
+ {
213
+ "current": 2650.0,
214
+ "min": 0.0,
215
+ "max": 0.0
216
+ },
217
+ {
218
+ "current": 2650.0,
219
+ "min": 0.0,
220
+ "max": 0.0
221
+ },
222
+ {
223
+ "current": 2650.0,
224
+ "min": 0.0,
225
+ "max": 0.0
226
+ },
227
+ {
228
+ "current": 2650.0,
229
+ "min": 0.0,
230
+ "max": 0.0
231
+ },
232
+ {
233
+ "current": 2650.0,
234
+ "min": 0.0,
235
+ "max": 0.0
236
+ },
237
+ {
238
+ "current": 2650.0,
239
+ "min": 0.0,
240
+ "max": 0.0
241
+ },
242
+ {
243
+ "current": 2650.0,
244
+ "min": 0.0,
245
+ "max": 0.0
246
+ },
247
+ {
248
+ "current": 2650.0,
249
+ "min": 0.0,
250
+ "max": 0.0
251
+ },
252
+ {
253
+ "current": 2650.0,
254
+ "min": 0.0,
255
+ "max": 0.0
256
+ },
257
+ {
258
+ "current": 2650.0,
259
+ "min": 0.0,
260
+ "max": 0.0
261
+ },
262
+ {
263
+ "current": 2650.0,
264
+ "min": 0.0,
265
+ "max": 0.0
266
+ },
267
+ {
268
+ "current": 2650.0,
269
+ "min": 0.0,
270
+ "max": 0.0
271
+ },
272
+ {
273
+ "current": 2650.0,
274
+ "min": 0.0,
275
+ "max": 0.0
276
+ },
277
+ {
278
+ "current": 2650.0,
279
+ "min": 0.0,
280
+ "max": 0.0
281
+ },
282
+ {
283
+ "current": 2650.0,
284
+ "min": 0.0,
285
+ "max": 0.0
286
+ },
287
+ {
288
+ "current": 2650.0,
289
+ "min": 0.0,
290
+ "max": 0.0
291
+ },
292
+ {
293
+ "current": 2650.0,
294
+ "min": 0.0,
295
+ "max": 0.0
296
+ },
297
+ {
298
+ "current": 2650.0,
299
+ "min": 0.0,
300
+ "max": 0.0
301
+ },
302
+ {
303
+ "current": 2650.0,
304
+ "min": 0.0,
305
+ "max": 0.0
306
+ },
307
+ {
308
+ "current": 2650.0,
309
+ "min": 0.0,
310
+ "max": 0.0
311
+ },
312
+ {
313
+ "current": 2650.0,
314
+ "min": 0.0,
315
+ "max": 0.0
316
+ },
317
+ {
318
+ "current": 2650.0,
319
+ "min": 0.0,
320
+ "max": 0.0
321
+ },
322
+ {
323
+ "current": 2650.0,
324
+ "min": 0.0,
325
+ "max": 0.0
326
+ },
327
+ {
328
+ "current": 2650.0,
329
+ "min": 0.0,
330
+ "max": 0.0
331
+ },
332
+ {
333
+ "current": 2650.0,
334
+ "min": 0.0,
335
+ "max": 0.0
336
+ },
337
+ {
338
+ "current": 2650.0,
339
+ "min": 0.0,
340
+ "max": 0.0
341
+ },
342
+ {
343
+ "current": 2650.0,
344
+ "min": 0.0,
345
+ "max": 0.0
346
+ },
347
+ {
348
+ "current": 2650.0,
349
+ "min": 0.0,
350
+ "max": 0.0
351
+ },
352
+ {
353
+ "current": 2650.0,
354
+ "min": 0.0,
355
+ "max": 0.0
356
+ },
357
+ {
358
+ "current": 2650.0,
359
+ "min": 0.0,
360
+ "max": 0.0
361
+ },
362
+ {
363
+ "current": 2650.0,
364
+ "min": 0.0,
365
+ "max": 0.0
366
+ },
367
+ {
368
+ "current": 2650.0,
369
+ "min": 0.0,
370
+ "max": 0.0
371
+ },
372
+ {
373
+ "current": 2650.0,
374
+ "min": 0.0,
375
+ "max": 0.0
376
+ },
377
+ {
378
+ "current": 2650.0,
379
+ "min": 0.0,
380
+ "max": 0.0
381
+ },
382
+ {
383
+ "current": 2650.0,
384
+ "min": 0.0,
385
+ "max": 0.0
386
+ },
387
+ {
388
+ "current": 2650.0,
389
+ "min": 0.0,
390
+ "max": 0.0
391
+ },
392
+ {
393
+ "current": 2650.0,
394
+ "min": 0.0,
395
+ "max": 0.0
396
+ },
397
+ {
398
+ "current": 2650.0,
399
+ "min": 0.0,
400
+ "max": 0.0
401
+ },
402
+ {
403
+ "current": 2650.0,
404
+ "min": 0.0,
405
+ "max": 0.0
406
+ },
407
+ {
408
+ "current": 2650.0,
409
+ "min": 0.0,
410
+ "max": 0.0
411
+ },
412
+ {
413
+ "current": 2650.0,
414
+ "min": 0.0,
415
+ "max": 0.0
416
+ },
417
+ {
418
+ "current": 2650.0,
419
+ "min": 0.0,
420
+ "max": 0.0
421
+ },
422
+ {
423
+ "current": 2650.0,
424
+ "min": 0.0,
425
+ "max": 0.0
426
+ },
427
+ {
428
+ "current": 2650.0,
429
+ "min": 0.0,
430
+ "max": 0.0
431
+ },
432
+ {
433
+ "current": 2650.0,
434
+ "min": 0.0,
435
+ "max": 0.0
436
+ },
437
+ {
438
+ "current": 2650.0,
439
+ "min": 0.0,
440
+ "max": 0.0
441
+ },
442
+ {
443
+ "current": 2650.0,
444
+ "min": 0.0,
445
+ "max": 0.0
446
+ },
447
+ {
448
+ "current": 2650.0,
449
+ "min": 0.0,
450
+ "max": 0.0
451
+ },
452
+ {
453
+ "current": 2650.0,
454
+ "min": 0.0,
455
+ "max": 0.0
456
+ },
457
+ {
458
+ "current": 2650.0,
459
+ "min": 0.0,
460
+ "max": 0.0
461
+ },
462
+ {
463
+ "current": 2650.0,
464
+ "min": 0.0,
465
+ "max": 0.0
466
+ },
467
+ {
468
+ "current": 2650.0,
469
+ "min": 0.0,
470
+ "max": 0.0
471
+ },
472
+ {
473
+ "current": 2650.0,
474
+ "min": 0.0,
475
+ "max": 0.0
476
+ },
477
+ {
478
+ "current": 2650.0,
479
+ "min": 0.0,
480
+ "max": 0.0
481
+ },
482
+ {
483
+ "current": 2650.0,
484
+ "min": 0.0,
485
+ "max": 0.0
486
+ },
487
+ {
488
+ "current": 2650.0,
489
+ "min": 0.0,
490
+ "max": 0.0
491
+ },
492
+ {
493
+ "current": 2650.0,
494
+ "min": 0.0,
495
+ "max": 0.0
496
+ },
497
+ {
498
+ "current": 2650.0,
499
+ "min": 0.0,
500
+ "max": 0.0
501
+ },
502
+ {
503
+ "current": 2650.0,
504
+ "min": 0.0,
505
+ "max": 0.0
506
+ },
507
+ {
508
+ "current": 2650.0,
509
+ "min": 0.0,
510
+ "max": 0.0
511
+ }
512
+ ],
513
+ "disk": {
514
+ "/": {
515
+ "total": 290.7472343444824,
516
+ "used": 57.44935989379883
517
+ }
518
+ },
519
+ "gpu": "NVIDIA H100 80GB HBM3",
520
+ "gpu_count": 8,
521
+ "gpu_devices": [
522
+ {
523
+ "name": "NVIDIA H100 80GB HBM3",
524
+ "memory_total": 85520809984
525
+ },
526
+ {
527
+ "name": "NVIDIA H100 80GB HBM3",
528
+ "memory_total": 85520809984
529
+ },
530
+ {
531
+ "name": "NVIDIA H100 80GB HBM3",
532
+ "memory_total": 85520809984
533
+ },
534
+ {
535
+ "name": "NVIDIA H100 80GB HBM3",
536
+ "memory_total": 85520809984
537
+ },
538
+ {
539
+ "name": "NVIDIA H100 80GB HBM3",
540
+ "memory_total": 85520809984
541
+ },
542
+ {
543
+ "name": "NVIDIA H100 80GB HBM3",
544
+ "memory_total": 85520809984
545
+ },
546
+ {
547
+ "name": "NVIDIA H100 80GB HBM3",
548
+ "memory_total": 85520809984
549
+ },
550
+ {
551
+ "name": "NVIDIA H100 80GB HBM3",
552
+ "memory_total": 85520809984
553
+ }
554
+ ],
555
+ "memory": {
556
+ "total": 1999.9855346679688
557
+ }
558
+ }
wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/loss": 6.8326, "train/learning_rate": 1.9809800282473014e-05, "train/epoch": 0.16, "train/global_step": 170, "_timestamp": 1706809286.8025768, "_runtime": 178.35070276260376, "_step": 34}
wandb/run-20240201_173828-py26nu6m/logs/debug-internal.log ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-02-01 17:38:28,445 INFO StreamThr :237521 [internal.py:wandb_internal():86] W&B internal server running at pid: 237521, started at: 2024-02-01 17:38:28.443368
2
+ 2024-02-01 17:38:28,446 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status
3
+ 2024-02-01 17:38:28,454 INFO WriterThread:237521 [datastore.py:open_for_write():85] open: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/run-py26nu6m.wandb
4
+ 2024-02-01 17:38:28,455 DEBUG SenderThread:237521 [sender.py:send():382] send: header
5
+ 2024-02-01 17:38:28,476 DEBUG SenderThread:237521 [sender.py:send():382] send: run
6
+ 2024-02-01 17:38:28,713 INFO SenderThread:237521 [dir_watcher.py:__init__():211] watching files in: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files
7
+ 2024-02-01 17:38:28,713 INFO SenderThread:237521 [sender.py:_start_run_threads():1136] run started: py26nu6m with start time 1706809108.451874
8
+ 2024-02-01 17:38:28,720 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: check_version
9
+ 2024-02-01 17:38:28,720 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: check_version
10
+ 2024-02-01 17:38:28,774 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: run_start
11
+ 2024-02-01 17:38:28,833 DEBUG HandlerThread:237521 [system_info.py:__init__():32] System info init
12
+ 2024-02-01 17:38:28,833 DEBUG HandlerThread:237521 [system_info.py:__init__():47] System info init done
13
+ 2024-02-01 17:38:28,833 INFO HandlerThread:237521 [system_monitor.py:start():194] Starting system monitor
14
+ 2024-02-01 17:38:28,833 INFO SystemMonitor:237521 [system_monitor.py:_start():158] Starting system asset monitoring threads
15
+ 2024-02-01 17:38:28,833 INFO HandlerThread:237521 [system_monitor.py:probe():214] Collecting system info
16
+ 2024-02-01 17:38:28,834 INFO SystemMonitor:237521 [interfaces.py:start():190] Started cpu monitoring
17
+ 2024-02-01 17:38:28,834 INFO SystemMonitor:237521 [interfaces.py:start():190] Started disk monitoring
18
+ 2024-02-01 17:38:28,834 INFO SystemMonitor:237521 [interfaces.py:start():190] Started gpu monitoring
19
+ 2024-02-01 17:38:28,836 INFO SystemMonitor:237521 [interfaces.py:start():190] Started memory monitoring
20
+ 2024-02-01 17:38:28,836 INFO SystemMonitor:237521 [interfaces.py:start():190] Started network monitoring
21
+ 2024-02-01 17:38:28,880 DEBUG HandlerThread:237521 [system_info.py:probe():196] Probing system
22
+ 2024-02-01 17:38:28,883 DEBUG HandlerThread:237521 [system_info.py:_probe_git():181] Probing git
23
+ 2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_info.py:_probe_git():189] Probing git done
24
+ 2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_info.py:probe():244] Probing system done
25
+ 2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-1048-aws-x86_64-with-glibc2.31', 'python': '3.11.5', 'heartbeatAt': '2024-02-01T17:38:28.880477', 'startedAt': '2024-02-01T17:38:28.419493', 'docker': None, 'cuda': None, 'args': ('config_full.yaml',), 'state': 'running', 'program': '/fsx/sanchit/distil-zephyr-1.5b-ssft/run_sft.py', 'codePathLocal': 'run_sft.py', 'codePath': 'run_sft.py', 'git': {'remote': 'https://huggingface.co/sanchit-gandhi/distil-zephyr-1.5b-ssft', 'commit': '79a4ae874a71e67016ded927e7d23351e5c7dab8'}, 'email': None, 'root': '/fsx/sanchit/distil-zephyr-1.5b-ssft', 'host': 'ip-26-0-165-24', 'username': 'sanchit', 'executable': '/fsx/sanchit/miniconda3/envs/venv/bin/python', 'cpu_count': 96, 'cpu_count_logical': 96, 'cpu_freq': {'current': 2731.1230833333334, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3598.237, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3593.987, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3597.474, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3587.972, 'min': 0.0, 'max': 0.0}, {'current': 3597.373, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2899.882, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3598.404, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3597.582, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 290.7472343444824, 'used': 57.44935989379883}}, 'gpu': 'NVIDIA H100 80GB HBM3', 'gpu_count': 8, 'gpu_devices': [{'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}], 'memory': {'total': 1999.9855346679688}}
26
+ 2024-02-01 17:38:28,908 INFO HandlerThread:237521 [system_monitor.py:probe():224] Finished collecting system info
27
+ 2024-02-01 17:38:28,908 INFO HandlerThread:237521 [system_monitor.py:probe():227] Publishing system info
28
+ 2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_info.py:_save_pip():52] Saving list of pip packages installed into the current environment
29
+ 2024-02-01 17:38:28,910 DEBUG HandlerThread:237521 [system_info.py:_save_pip():68] Saving pip packages done
30
+ 2024-02-01 17:38:28,910 DEBUG HandlerThread:237521 [system_info.py:_save_conda():75] Saving list of conda packages installed into the current environment
31
+ 2024-02-01 17:38:29,716 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/conda-environment.yaml
32
+ 2024-02-01 17:38:29,716 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/requirements.txt
33
+ 2024-02-01 17:38:34,420 DEBUG HandlerThread:237521 [system_info.py:_save_conda():87] Saving conda packages done
34
+ 2024-02-01 17:38:34,423 INFO HandlerThread:237521 [system_monitor.py:probe():229] Finished publishing system info
35
+ 2024-02-01 17:38:34,457 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
36
+ 2024-02-01 17:38:34,457 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: keepalive
37
+ 2024-02-01 17:38:34,458 DEBUG SenderThread:237521 [sender.py:send():382] send: files
38
+ 2024-02-01 17:38:34,458 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-metadata.json with policy now
39
+ 2024-02-01 17:38:34,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
40
+ 2024-02-01 17:38:34,465 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
41
+ 2024-02-01 17:38:34,466 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
42
+ 2024-02-01 17:38:34,553 DEBUG SenderThread:237521 [sender.py:send():382] send: telemetry
43
+ 2024-02-01 17:38:34,554 DEBUG SenderThread:237521 [sender.py:send():382] send: config
44
+ 2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
45
+ 2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: telemetry
46
+ 2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
47
+ 2024-02-01 17:38:34,555 WARNING SenderThread:237521 [sender.py:send_metric():1343] Seen metric with glob (shouldn't happen)
48
+ 2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: telemetry
49
+ 2024-02-01 17:38:34,721 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/conda-environment.yaml
50
+ 2024-02-01 17:38:34,722 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-metadata.json
51
+ 2024-02-01 17:38:34,722 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
52
+ 2024-02-01 17:38:34,796 INFO wandb-upload_0:237521 [upload_job.py:push():131] Uploaded file /tmp/tmpfs5f2n7fwandb/421ry27q-wandb-metadata.json
53
+ 2024-02-01 17:38:36,724 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
54
+ 2024-02-01 17:38:39,559 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
55
+ 2024-02-01 17:38:42,110 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
56
+ 2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
57
+ 2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
58
+ 2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
59
+ 2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: history
60
+ 2024-02-01 17:38:42,113 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
61
+ 2024-02-01 17:38:42,116 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
62
+ 2024-02-01 17:38:42,732 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
63
+ 2024-02-01 17:38:44,735 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
64
+ 2024-02-01 17:38:44,770 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
65
+ 2024-02-01 17:38:45,673 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
66
+ 2024-02-01 17:38:45,674 DEBUG SenderThread:237521 [sender.py:send():382] send: history
67
+ 2024-02-01 17:38:45,674 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
68
+ 2024-02-01 17:38:45,676 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
69
+ 2024-02-01 17:38:45,737 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
70
+ 2024-02-01 17:38:46,738 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
71
+ 2024-02-01 17:38:48,741 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
72
+ 2024-02-01 17:38:49,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
73
+ 2024-02-01 17:38:49,465 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
74
+ 2024-02-01 17:38:49,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
75
+ 2024-02-01 17:38:50,190 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
76
+ 2024-02-01 17:38:50,230 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
77
+ 2024-02-01 17:38:50,232 DEBUG SenderThread:237521 [sender.py:send():382] send: history
78
+ 2024-02-01 17:38:50,232 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
79
+ 2024-02-01 17:38:50,234 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
80
+ 2024-02-01 17:38:50,745 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
81
+ 2024-02-01 17:38:50,745 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
82
+ 2024-02-01 17:38:52,747 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
83
+ 2024-02-01 17:38:54,715 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
84
+ 2024-02-01 17:38:54,716 DEBUG SenderThread:237521 [sender.py:send():382] send: history
85
+ 2024-02-01 17:38:54,716 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
86
+ 2024-02-01 17:38:54,718 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
87
+ 2024-02-01 17:38:54,751 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
88
+ 2024-02-01 17:38:54,751 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
89
+ 2024-02-01 17:38:55,617 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
90
+ 2024-02-01 17:38:56,753 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
91
+ 2024-02-01 17:38:58,756 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
92
+ 2024-02-01 17:38:59,239 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
93
+ 2024-02-01 17:38:59,240 DEBUG SenderThread:237521 [sender.py:send():382] send: history
94
+ 2024-02-01 17:38:59,240 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
95
+ 2024-02-01 17:38:59,242 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
96
+ 2024-02-01 17:38:59,758 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
97
+ 2024-02-01 17:39:00,760 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
98
+ 2024-02-01 17:39:01,049 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
99
+ 2024-02-01 17:39:01,762 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/config.yaml
100
+ 2024-02-01 17:39:02,763 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
101
+ 2024-02-01 17:39:03,754 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
102
+ 2024-02-01 17:39:03,755 DEBUG SenderThread:237521 [sender.py:send():382] send: history
103
+ 2024-02-01 17:39:03,755 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
104
+ 2024-02-01 17:39:03,757 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
105
+ 2024-02-01 17:39:03,766 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
106
+ 2024-02-01 17:39:04,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
107
+ 2024-02-01 17:39:04,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
108
+ 2024-02-01 17:39:04,466 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
109
+ 2024-02-01 17:39:04,767 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
110
+ 2024-02-01 17:39:06,478 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
111
+ 2024-02-01 17:39:06,770 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
112
+ 2024-02-01 17:39:08,293 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
113
+ 2024-02-01 17:39:08,294 DEBUG SenderThread:237521 [sender.py:send():382] send: history
114
+ 2024-02-01 17:39:08,295 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
115
+ 2024-02-01 17:39:08,297 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
116
+ 2024-02-01 17:39:08,773 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
117
+ 2024-02-01 17:39:08,774 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
118
+ 2024-02-01 17:39:10,776 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
119
+ 2024-02-01 17:39:11,923 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
120
+ 2024-02-01 17:39:12,779 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
121
+ 2024-02-01 17:39:12,828 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
122
+ 2024-02-01 17:39:12,830 DEBUG SenderThread:237521 [sender.py:send():382] send: history
123
+ 2024-02-01 17:39:12,830 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
124
+ 2024-02-01 17:39:12,832 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
125
+ 2024-02-01 17:39:13,781 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
126
+ 2024-02-01 17:39:14,782 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
127
+ 2024-02-01 17:39:16,785 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
128
+ 2024-02-01 17:39:17,363 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
129
+ 2024-02-01 17:39:17,365 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
130
+ 2024-02-01 17:39:17,366 DEBUG SenderThread:237521 [sender.py:send():382] send: history
131
+ 2024-02-01 17:39:17,367 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
132
+ 2024-02-01 17:39:17,368 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
133
+ 2024-02-01 17:39:17,788 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
134
+ 2024-02-01 17:39:18,789 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
135
+ 2024-02-01 17:39:19,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
136
+ 2024-02-01 17:39:19,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
137
+ 2024-02-01 17:39:19,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
138
+ 2024-02-01 17:39:20,792 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
139
+ 2024-02-01 17:39:21,909 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
140
+ 2024-02-01 17:39:21,911 DEBUG SenderThread:237521 [sender.py:send():382] send: history
141
+ 2024-02-01 17:39:21,911 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
142
+ 2024-02-01 17:39:21,913 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
143
+ 2024-02-01 17:39:22,795 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
144
+ 2024-02-01 17:39:22,796 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
145
+ 2024-02-01 17:39:22,815 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
146
+ 2024-02-01 17:39:24,798 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
147
+ 2024-02-01 17:39:26,448 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
148
+ 2024-02-01 17:39:26,450 DEBUG SenderThread:237521 [sender.py:send():382] send: history
149
+ 2024-02-01 17:39:26,450 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
150
+ 2024-02-01 17:39:26,452 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
151
+ 2024-02-01 17:39:26,802 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
152
+ 2024-02-01 17:39:26,802 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
153
+ 2024-02-01 17:39:28,269 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
154
+ 2024-02-01 17:39:28,805 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
155
+ 2024-02-01 17:39:28,836 DEBUG SystemMonitor:237521 [system_monitor.py:_start():172] Starting system metrics aggregation loop
156
+ 2024-02-01 17:39:28,850 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
157
+ 2024-02-01 17:39:30,807 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
158
+ 2024-02-01 17:39:31,001 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
159
+ 2024-02-01 17:39:31,003 DEBUG SenderThread:237521 [sender.py:send():382] send: history
160
+ 2024-02-01 17:39:31,003 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
161
+ 2024-02-01 17:39:31,005 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
162
+ 2024-02-01 17:39:31,810 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
163
+ 2024-02-01 17:39:32,811 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
164
+ 2024-02-01 17:39:33,729 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
165
+ 2024-02-01 17:39:34,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
166
+ 2024-02-01 17:39:34,465 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
167
+ 2024-02-01 17:39:34,468 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
168
+ 2024-02-01 17:39:34,814 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
169
+ 2024-02-01 17:39:35,548 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
170
+ 2024-02-01 17:39:35,550 DEBUG SenderThread:237521 [sender.py:send():382] send: history
171
+ 2024-02-01 17:39:35,550 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
172
+ 2024-02-01 17:39:35,552 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
173
+ 2024-02-01 17:39:35,816 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
174
+ 2024-02-01 17:39:36,817 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
175
+ 2024-02-01 17:39:38,820 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
176
+ 2024-02-01 17:39:39,188 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
177
+ 2024-02-01 17:39:40,104 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
178
+ 2024-02-01 17:39:40,105 DEBUG SenderThread:237521 [sender.py:send():382] send: history
179
+ 2024-02-01 17:39:40,105 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
180
+ 2024-02-01 17:39:40,108 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
181
+ 2024-02-01 17:39:40,824 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
182
+ 2024-02-01 17:39:40,824 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
183
+ 2024-02-01 17:39:42,826 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
184
+ 2024-02-01 17:39:44,651 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
185
+ 2024-02-01 17:39:44,652 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
186
+ 2024-02-01 17:39:44,653 DEBUG SenderThread:237521 [sender.py:send():382] send: history
187
+ 2024-02-01 17:39:44,653 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
188
+ 2024-02-01 17:39:44,655 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
189
+ 2024-02-01 17:39:44,830 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
190
+ 2024-02-01 17:39:44,830 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
191
+ 2024-02-01 17:39:46,833 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
192
+ 2024-02-01 17:39:48,835 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
193
+ 2024-02-01 17:39:49,211 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
194
+ 2024-02-01 17:39:49,212 DEBUG SenderThread:237521 [sender.py:send():382] send: history
195
+ 2024-02-01 17:39:49,212 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
196
+ 2024-02-01 17:39:49,214 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
197
+ 2024-02-01 17:39:49,466 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
198
+ 2024-02-01 17:39:49,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
199
+ 2024-02-01 17:39:49,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
200
+ 2024-02-01 17:39:49,838 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
201
+ 2024-02-01 17:39:50,121 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
202
+ 2024-02-01 17:39:50,839 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
203
+ 2024-02-01 17:39:52,842 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
204
+ 2024-02-01 17:39:53,762 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
205
+ 2024-02-01 17:39:53,763 DEBUG SenderThread:237521 [sender.py:send():382] send: history
206
+ 2024-02-01 17:39:53,763 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
207
+ 2024-02-01 17:39:53,765 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
208
+ 2024-02-01 17:39:53,844 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
209
+ 2024-02-01 17:39:54,845 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
210
+ 2024-02-01 17:39:55,580 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
211
+ 2024-02-01 17:39:56,848 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
212
+ 2024-02-01 17:39:58,314 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
213
+ 2024-02-01 17:39:58,316 DEBUG SenderThread:237521 [sender.py:send():382] send: history
214
+ 2024-02-01 17:39:58,316 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
215
+ 2024-02-01 17:39:58,318 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
216
+ 2024-02-01 17:39:58,842 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
217
+ 2024-02-01 17:39:58,852 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
218
+ 2024-02-01 17:39:58,852 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
219
+ 2024-02-01 17:40:00,854 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
220
+ 2024-02-01 17:40:01,035 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
221
+ 2024-02-01 17:40:02,853 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
222
+ 2024-02-01 17:40:02,855 DEBUG SenderThread:237521 [sender.py:send():382] send: history
223
+ 2024-02-01 17:40:02,855 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
224
+ 2024-02-01 17:40:02,857 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
225
+ 2024-02-01 17:40:02,858 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
226
+ 2024-02-01 17:40:02,859 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
227
+ 2024-02-01 17:40:04,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
228
+ 2024-02-01 17:40:04,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
229
+ 2024-02-01 17:40:04,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
230
+ 2024-02-01 17:40:04,861 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
231
+ 2024-02-01 17:40:06,498 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
232
+ 2024-02-01 17:40:06,864 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
233
+ 2024-02-01 17:40:07,408 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
234
+ 2024-02-01 17:40:07,409 DEBUG SenderThread:237521 [sender.py:send():382] send: history
235
+ 2024-02-01 17:40:07,409 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
236
+ 2024-02-01 17:40:07,411 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
237
+ 2024-02-01 17:40:07,866 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
238
+ 2024-02-01 17:40:08,867 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
239
+ 2024-02-01 17:40:10,870 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
240
+ 2024-02-01 17:40:11,953 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
241
+ 2024-02-01 17:40:11,954 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
242
+ 2024-02-01 17:40:11,955 DEBUG SenderThread:237521 [sender.py:send():382] send: history
243
+ 2024-02-01 17:40:11,956 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
244
+ 2024-02-01 17:40:11,958 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
245
+ 2024-02-01 17:40:12,874 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
246
+ 2024-02-01 17:40:12,874 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
247
+ 2024-02-01 17:40:14,876 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
248
+ 2024-02-01 17:40:16,879 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
249
+ 2024-02-01 17:40:17,215 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
250
+ 2024-02-01 17:40:18,882 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
251
+ 2024-02-01 17:40:19,958 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
252
+ 2024-02-01 17:40:19,959 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
253
+ 2024-02-01 17:40:19,959 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
254
+ 2024-02-01 17:40:22,278 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
255
+ 2024-02-01 17:40:22,888 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
256
+ 2024-02-01 17:40:24,892 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
257
+ 2024-02-01 17:40:26,895 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
258
+ 2024-02-01 17:40:27,568 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
259
+ 2024-02-01 17:40:27,569 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
260
+ 2024-02-01 17:40:27,571 DEBUG SenderThread:237521 [sender.py:send():382] send: history
261
+ 2024-02-01 17:40:27,571 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
262
+ 2024-02-01 17:40:27,573 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
263
+ 2024-02-01 17:40:27,897 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
264
+ 2024-02-01 17:40:28,844 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
265
+ 2024-02-01 17:40:28,898 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
266
+ 2024-02-01 17:40:30,901 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
267
+ 2024-02-01 17:40:32,115 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
268
+ 2024-02-01 17:40:32,117 DEBUG SenderThread:237521 [sender.py:send():382] send: history
269
+ 2024-02-01 17:40:32,117 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
270
+ 2024-02-01 17:40:32,119 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
271
+ 2024-02-01 17:40:32,904 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
272
+ 2024-02-01 17:40:32,904 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
273
+ 2024-02-01 17:40:33,026 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
274
+ 2024-02-01 17:40:34,907 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
275
+ 2024-02-01 17:40:34,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
276
+ 2024-02-01 17:40:34,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
277
+ 2024-02-01 17:40:34,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
278
+ 2024-02-01 17:40:36,665 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
279
+ 2024-02-01 17:40:36,667 DEBUG SenderThread:237521 [sender.py:send():382] send: history
280
+ 2024-02-01 17:40:36,667 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
281
+ 2024-02-01 17:40:36,669 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
282
+ 2024-02-01 17:40:36,910 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
283
+ 2024-02-01 17:40:36,911 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
284
+ 2024-02-01 17:40:38,487 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
285
+ 2024-02-01 17:40:38,913 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
286
+ 2024-02-01 17:40:40,915 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
287
+ 2024-02-01 17:40:41,219 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
288
+ 2024-02-01 17:40:41,220 DEBUG SenderThread:237521 [sender.py:send():382] send: history
289
+ 2024-02-01 17:40:41,221 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
290
+ 2024-02-01 17:40:41,223 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
291
+ 2024-02-01 17:40:41,917 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
292
+ 2024-02-01 17:40:42,919 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
293
+ 2024-02-01 17:40:43,949 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
294
+ 2024-02-01 17:40:44,922 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
295
+ 2024-02-01 17:40:45,773 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
296
+ 2024-02-01 17:40:45,775 DEBUG SenderThread:237521 [sender.py:send():382] send: history
297
+ 2024-02-01 17:40:45,776 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
298
+ 2024-02-01 17:40:45,778 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
299
+ 2024-02-01 17:40:45,924 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
300
+ 2024-02-01 17:40:46,925 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
301
+ 2024-02-01 17:40:48,927 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
302
+ 2024-02-01 17:40:49,410 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
303
+ 2024-02-01 17:40:49,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
304
+ 2024-02-01 17:40:49,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
305
+ 2024-02-01 17:40:49,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
306
+ 2024-02-01 17:40:50,362 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
307
+ 2024-02-01 17:40:50,930 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
308
+ 2024-02-01 17:40:51,403 DEBUG SenderThread:237521 [sender.py:send():382] send: history
309
+ 2024-02-01 17:40:51,403 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
310
+ 2024-02-01 17:40:51,405 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
311
+ 2024-02-01 17:40:51,932 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
312
+ 2024-02-01 17:40:52,934 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
313
+ 2024-02-01 17:40:54,873 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
314
+ 2024-02-01 17:40:54,874 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
315
+ 2024-02-01 17:40:54,876 DEBUG SenderThread:237521 [sender.py:send():382] send: history
316
+ 2024-02-01 17:40:54,876 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
317
+ 2024-02-01 17:40:54,878 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
318
+ 2024-02-01 17:40:54,937 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
319
+ 2024-02-01 17:40:54,937 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
320
+ 2024-02-01 17:40:56,940 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
321
+ 2024-02-01 17:40:58,846 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
322
+ 2024-02-01 17:40:58,942 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
323
+ 2024-02-01 17:40:59,420 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
324
+ 2024-02-01 17:40:59,422 DEBUG SenderThread:237521 [sender.py:send():382] send: history
325
+ 2024-02-01 17:40:59,422 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
326
+ 2024-02-01 17:40:59,424 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
327
+ 2024-02-01 17:40:59,944 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
328
+ 2024-02-01 17:41:00,330 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
329
+ 2024-02-01 17:41:00,946 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
330
+ 2024-02-01 17:41:02,948 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
331
+ 2024-02-01 17:41:03,975 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
332
+ 2024-02-01 17:41:03,976 DEBUG SenderThread:237521 [sender.py:send():382] send: history
333
+ 2024-02-01 17:41:03,976 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
334
+ 2024-02-01 17:41:03,978 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
335
+ 2024-02-01 17:41:04,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
336
+ 2024-02-01 17:41:04,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
337
+ 2024-02-01 17:41:04,952 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
338
+ 2024-02-01 17:41:04,952 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
339
+ 2024-02-01 17:41:04,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
340
+ 2024-02-01 17:41:05,797 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
341
+ 2024-02-01 17:41:06,955 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
342
+ 2024-02-01 17:41:08,527 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
343
+ 2024-02-01 17:41:08,529 DEBUG SenderThread:237521 [sender.py:send():382] send: history
344
+ 2024-02-01 17:41:08,529 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
345
+ 2024-02-01 17:41:08,531 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
346
+ 2024-02-01 17:41:08,958 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
347
+ 2024-02-01 17:41:08,959 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
348
+ 2024-02-01 17:41:10,961 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
349
+ 2024-02-01 17:41:11,264 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
350
+ 2024-02-01 17:41:12,964 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
351
+ 2024-02-01 17:41:13,085 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
352
+ 2024-02-01 17:41:13,086 DEBUG SenderThread:237521 [sender.py:send():382] send: history
353
+ 2024-02-01 17:41:13,087 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
354
+ 2024-02-01 17:41:13,089 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
355
+ 2024-02-01 17:41:13,966 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
356
+ 2024-02-01 17:41:14,967 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
357
+ 2024-02-01 17:41:16,736 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
358
+ 2024-02-01 17:41:16,969 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
359
+ 2024-02-01 17:41:17,649 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
360
+ 2024-02-01 17:41:17,650 DEBUG SenderThread:237521 [sender.py:send():382] send: history
361
+ 2024-02-01 17:41:17,651 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
362
+ 2024-02-01 17:41:17,653 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
363
+ 2024-02-01 17:41:17,972 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
364
+ 2024-02-01 17:41:18,973 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
365
+ 2024-02-01 17:41:19,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
366
+ 2024-02-01 17:41:19,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
367
+ 2024-02-01 17:41:19,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
368
+ 2024-02-01 17:41:20,976 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
369
+ 2024-02-01 17:41:22,234 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
370
+ 2024-02-01 17:41:22,235 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
371
+ 2024-02-01 17:41:22,236 DEBUG SenderThread:237521 [sender.py:send():382] send: history
372
+ 2024-02-01 17:41:22,236 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
373
+ 2024-02-01 17:41:22,238 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
374
+ 2024-02-01 17:41:22,979 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
375
+ 2024-02-01 17:41:22,979 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
376
+ 2024-02-01 17:41:24,981 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
377
+ 2024-02-01 17:41:26,803 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
378
+ 2024-02-01 17:41:26,804 DEBUG SenderThread:237521 [sender.py:send():382] send: history
379
+ 2024-02-01 17:41:26,805 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
380
+ 2024-02-01 17:41:26,806 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
381
+ 2024-02-01 17:41:26,985 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
382
+ 2024-02-01 17:41:26,985 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
383
+ 2024-02-01 17:41:27,718 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
384
+ 2024-02-01 17:41:28,848 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
385
+ 2024-02-01 17:41:28,987 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
wandb/run-20240201_173828-py26nu6m/logs/debug.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Current SDK version is 0.16.1
2
+ 2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Configure stats pid to 237059
3
+ 2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Loading settings from /admin/home/sanchit/.config/wandb/settings
4
+ 2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Loading settings from /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/settings
5
+ 2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_sft.py', 'program_abspath': '/fsx/sanchit/distil-zephyr-1.5b-ssft/run_sft.py', 'program': '/fsx/sanchit/distil-zephyr-1.5b-ssft/run_sft.py'}
8
+ 2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:_log_setup():524] Logging user logs to /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/logs/debug.log
9
+ 2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:_log_setup():525] Logging internal logs to /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/logs/debug-internal.log
10
+ 2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():564] calling init triggers
11
+ 2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():571] wandb.init called with sweep_config: {}
12
+ config: {}
13
+ 2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():614] starting backend
14
+ 2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():618] setting up manager
15
+ 2024-02-01 17:38:28,441 INFO MainThread:237059 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
16
+ 2024-02-01 17:38:28,451 INFO MainThread:237059 [wandb_init.py:init():624] backend started and connected
17
+ 2024-02-01 17:38:28,453 INFO MainThread:237059 [wandb_init.py:init():716] updated telemetry
18
+ 2024-02-01 17:38:28,475 INFO MainThread:237059 [wandb_init.py:init():749] communicating run to backend with 90.0 second timeout
19
+ 2024-02-01 17:38:28,720 INFO MainThread:237059 [wandb_run.py:_on_init():2254] communicating current version
20
+ 2024-02-01 17:38:28,767 INFO MainThread:237059 [wandb_run.py:_on_init():2263] got version response upgrade_message: "wandb version 0.16.2 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
21
+
22
+ 2024-02-01 17:38:28,767 INFO MainThread:237059 [wandb_init.py:init():800] starting run threads in backend
23
+ 2024-02-01 17:38:34,465 INFO MainThread:237059 [wandb_run.py:_console_start():2233] atexit reg
24
+ 2024-02-01 17:38:34,465 INFO MainThread:237059 [wandb_run.py:_redirect():2088] redirect: wrap_raw
25
+ 2024-02-01 17:38:34,466 INFO MainThread:237059 [wandb_run.py:_redirect():2153] Wrapping output streams.
26
+ 2024-02-01 17:38:34,466 INFO MainThread:237059 [wandb_run.py:_redirect():2178] Redirects installed.
27
+ 2024-02-01 17:38:34,467 INFO MainThread:237059 [wandb_init.py:init():841] run started, returning control to user process
28
+ 2024-02-01 17:38:34,468 INFO MainThread:237059 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 32768, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 6, 'num_attention_heads': 32, 'sliding_window': 4096, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'use_cache': False, 'rope_theta': 10000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['MistralForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'sanchit-gandhi/Mistral-7B-v0.1-6-layer', 'transformers_version': '4.36.2', 'model_type': 'mistral', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.1, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb01_17-38-02_ip-26-0-165-24', 'logging_strategy': 'steps', 'logging_first_step': True, 'logging_steps': 5, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'max_seq_length': 2048}
wandb/run-20240201_173828-py26nu6m/run-py26nu6m.wandb ADDED
Binary file (98.4 kB). View file