dwivedi-rishabh commited on
Commit
c369870
·
verified ·
1 Parent(s): 8a9b32c

Upload folder using huggingface_hub

Browse files
args.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ batch_size: 1
2
+ checkpoint: true
3
+ ckpt_freq: 10
4
+ data:
5
+ data: ''
6
+ eval_instruct_data: ./data/t_eval.jsonl
7
+ instruct:
8
+ dynamic_chunk_fn_call: true
9
+ shuffle: true
10
+ instruct_data: ./data/t_train.jsonl
11
+ shuffle: false
12
+ eval_freq: 5
13
+ log_freq: 1
14
+ lora:
15
+ dropout: 0.05
16
+ enable: true
17
+ rank: 16
18
+ scaling: 2.0
19
+ max_norm: 1.0
20
+ max_steps: 10
21
+ mlflow:
22
+ experiment_name: null
23
+ tracking_uri: null
24
+ model_id_or_path: ../downloaded_model/mistral_models/7B-v0.3/
25
+ no_ckpt: false
26
+ no_eval: false
27
+ num_ckpt_keep: 3
28
+ num_microbatches: 4
29
+ optim:
30
+ lr: 0.0001
31
+ pct_start: 0.05
32
+ weight_decay: 0.1
33
+ run_dir: ./content/test_ultra_1
34
+ save_adapters: false
35
+ seed: 0
36
+ seq_len: 1024
37
+ wandb:
38
+ key: 9e930bac040e715c4be9de064318956067b45479
39
+ offline: false
40
+ project: finetuning_mistral
41
+ run_name: null
42
+ world_size: 1
checkpoints/checkpoint_000010/consolidated/consolidated.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1168a65dcc1f06d0371b8a2834c8936502becc2dab70ef51f56f6ac9e8eaa05
3
+ size 14496078512
checkpoints/checkpoint_000010/consolidated/params.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dim": 4096,
3
+ "n_layers": 32,
4
+ "head_dim": 128,
5
+ "hidden_dim": 14336,
6
+ "n_heads": 32,
7
+ "n_kv_heads": 8,
8
+ "norm_eps": 1e-05,
9
+ "vocab_size": 32768,
10
+ "rope_theta": 1000000.0,
11
+ "lora": {
12
+ "enable": true,
13
+ "rank": 16,
14
+ "dropout": 0.05,
15
+ "scaling": 2.0
16
+ },
17
+ "moe": null
18
+ }
checkpoints/checkpoint_000010/consolidated/tokenizer.model.v3 ADDED
Binary file (588 kB). View file
 
metrics.eval.jsonl ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {"step": 5, "train_loss": 0.30592161417007446, "perplexity": 1.260413646697998, "eval_loss": 0.33389732241630554, "at": "2024-09-01T08:08:52.332862"}
2
+ {"step": 10, "train_loss": 0.108039490878582, "perplexity": 1.1342318058013916, "eval_loss": 0.18171557784080505, "at": "2024-09-01T08:09:07.247441"}
metrics.train.jsonl ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"lr": 9.931806789787545e-05, "step": 1, "loss": 1.1353108882904053, "percent_done": 10.0, "peak_allocated_mem": 14.656035900115967, "allocated_mem": 14.327418327331543, "wps": 1177.8248031910382, "avg_wps": 1177.8248031910382, "eta_in_seconds": 31.298372983932495, "at": "2024-09-01T08:08:40.093343"}
2
+ {"lr": 9.397371166557421e-05, "step": 2, "loss": 0.882387101650238, "percent_done": 20.0, "peak_allocated_mem": 14.968535900115967, "allocated_mem": 14.327418327331543, "wps": 1420.5461474296683, "avg_wps": 1287.848823987544, "eta_in_seconds": 25.44398021697998, "at": "2024-09-01T08:08:42.978158"}
3
+ {"lr": 8.386414312497274e-05, "step": 3, "loss": 0.7568908929824829, "percent_done": 30.0, "peak_allocated_mem": 14.968535900115967, "allocated_mem": 14.327418327331543, "wps": 1376.892615343285, "avg_wps": 1316.2222344521215, "eta_in_seconds": 21.78355542818705, "at": "2024-09-01T08:08:45.954286"}
4
+ {"lr": 7.008489089356355e-05, "step": 4, "loss": 0.4648980498313904, "percent_done": 40.0, "peak_allocated_mem": 14.968535900115967, "allocated_mem": 14.327418327331543, "wps": 1424.7641809898853, "avg_wps": 1341.7771840572461, "eta_in_seconds": 18.316006779670715, "at": "2024-09-01T08:08:48.830255"}
5
+ {"lr": 5.412915075774753e-05, "step": 5, "loss": 0.30592161417007446, "percent_done": 50.0, "peak_allocated_mem": 14.968535900115967, "allocated_mem": 14.327418327331543, "wps": 1169.4011792663457, "avg_wps": 1303.3529414843938, "eta_in_seconds": 15.713318586349487, "at": "2024-09-01T08:08:52.333812"}
6
+ {"lr": 3.772597474005748e-05, "step": 6, "loss": 0.22509340941905975, "percent_done": 60.0, "peak_allocated_mem": 14.968535900115967, "allocated_mem": 14.327418327331543, "wps": 1470.749009377081, "avg_wps": 1328.554938478532, "eta_in_seconds": 12.33219607671102, "at": "2024-09-01T08:08:55.120107"}
7
+ {"lr": 2.265290148351029e-05, "step": 7, "loss": 0.16402409970760345, "percent_done": 70.0, "peak_allocated_mem": 14.968535900115967, "allocated_mem": 14.327418327331543, "wps": 1418.1967238745826, "avg_wps": 1340.6607771976503, "eta_in_seconds": 9.165629523141043, "at": "2024-09-01T08:08:58.009620"}
8
+ {"lr": 1.0543332358282206e-05, "step": 8, "loss": 0.12998557090759277, "percent_done": 80.0, "peak_allocated_mem": 14.968535900115967, "allocated_mem": 14.327418327331543, "wps": 1411.0965588366037, "avg_wps": 1349.0782897373429, "eta_in_seconds": 6.072293996810913, "at": "2024-09-01T08:09:00.913464"}
9
+ {"lr": 2.7095270784166083e-06, "step": 9, "loss": 0.15246960520744324, "percent_done": 90.0, "peak_allocated_mem": 14.968535900115967, "allocated_mem": 14.327418327331543, "wps": 1411.9670613026851, "avg_wps": 1355.7879077002867, "eta_in_seconds": 3.0211215019226074, "at": "2024-09-01T08:09:03.815685"}
10
+ {"lr": 3.9999999999999996e-10, "step": 10, "loss": 0.108039490878582, "percent_done": 100.0, "peak_allocated_mem": 14.968535900115967, "allocated_mem": 14.327418327331543, "wps": 1193.533250331889, "avg_wps": 1337.6038764958669, "eta_in_seconds": 0.0, "at": "2024-09-01T08:09:07.248545"}
tb/events.out.tfevents.1725178109.f6e183d76cb1.1234.0.train ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46ad0922957fe7b47b48fe24c04a06c063ea0de82bc87a409ef40df49995711d
3
+ size 4338
tb/events.out.tfevents.1725178110.f6e183d76cb1.1234.1.eval ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:692f396a6274e05cbc16f1af6e15d41684082b2c5192ddae6c185fef0bcd3c2f
3
+ size 404
wandb/debug-internal.log ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-09-01 08:08:30,433 INFO StreamThr :1314 [internal.py:wandb_internal():85] W&B internal server running at pid: 1314, started at: 2024-09-01 08:08:30.432847
2
+ 2024-09-01 08:08:30,434 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status
3
+ 2024-09-01 08:08:30,436 INFO WriterThread:1314 [datastore.py:open_for_write():87] open: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/run-ra4ttuu9.wandb
4
+ 2024-09-01 08:08:30,437 DEBUG SenderThread:1314 [sender.py:send():391] send: header
5
+ 2024-09-01 08:08:30,442 DEBUG SenderThread:1314 [sender.py:send():391] send: run
6
+ 2024-09-01 08:08:30,661 INFO SenderThread:1314 [dir_watcher.py:__init__():211] watching files in: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files
7
+ 2024-09-01 08:08:30,661 INFO SenderThread:1314 [sender.py:_start_run_threads():1200] run started: ra4ttuu9 with start time 1725178110.432525
8
+ 2024-09-01 08:08:30,668 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: run_start
9
+ 2024-09-01 08:08:30,682 DEBUG HandlerThread:1314 [system_info.py:__init__():26] System info init
10
+ 2024-09-01 08:08:30,682 DEBUG HandlerThread:1314 [system_info.py:__init__():41] System info init done
11
+ 2024-09-01 08:08:30,682 INFO HandlerThread:1314 [system_monitor.py:start():194] Starting system monitor
12
+ 2024-09-01 08:08:30,683 INFO SystemMonitor:1314 [system_monitor.py:_start():158] Starting system asset monitoring threads
13
+ 2024-09-01 08:08:30,683 INFO HandlerThread:1314 [system_monitor.py:probe():214] Collecting system info
14
+ 2024-09-01 08:08:30,684 INFO SystemMonitor:1314 [interfaces.py:start():188] Started cpu monitoring
15
+ 2024-09-01 08:08:30,684 INFO SystemMonitor:1314 [interfaces.py:start():188] Started disk monitoring
16
+ 2024-09-01 08:08:30,685 INFO SystemMonitor:1314 [interfaces.py:start():188] Started gpu monitoring
17
+ 2024-09-01 08:08:30,686 INFO SystemMonitor:1314 [interfaces.py:start():188] Started memory monitoring
18
+ 2024-09-01 08:08:30,686 INFO SystemMonitor:1314 [interfaces.py:start():188] Started network monitoring
19
+ 2024-09-01 08:08:30,709 DEBUG HandlerThread:1314 [system_info.py:probe():152] Probing system
20
+ 2024-09-01 08:08:30,711 DEBUG HandlerThread:1314 [system_info.py:_probe_git():137] Probing git
21
+ 2024-09-01 08:08:30,718 DEBUG HandlerThread:1314 [system_info.py:_probe_git():145] Probing git done
22
+ 2024-09-01 08:08:30,718 DEBUG HandlerThread:1314 [system_info.py:probe():200] Probing system done
23
+ 2024-09-01 08:08:30,718 DEBUG HandlerThread:1314 [system_monitor.py:probe():223] {'os': 'Linux-6.5.0-35-generic-x86_64-with-glibc2.35', 'python': '3.10.12', 'heartbeatAt': '2024-09-01T08:08:30.709894', 'startedAt': '2024-09-01T08:08:30.425931', 'docker': None, 'cuda': None, 'args': ('example.yaml',), 'state': 'running', 'program': '-m train', 'codePathLocal': None, 'git': {'remote': 'https://github.com/mistralai/mistral-finetune.git', 'commit': '0b0eaac415ba6317e198aa2207c7bd4b3973adbe'}, 'email': None, 'root': '/workspace/mistral-finetune', 'host': 'f6e183d76cb1', 'username': 'root', 'executable': '/usr/bin/python', 'cpu_count': 32, 'cpu_count_logical': 64, 'cpu_freq': {'current': 1976.101125, 'min': 1500.0, 'max': 3200.0}, 'cpu_freq_per_core': [{'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3925.001, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 2300.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3200.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 2773.255, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3200.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 2221.478, 'min': 1500.0, 'max': 3200.0}, {'current': 2062.966, 'min': 1500.0, 'max': 3200.0}, {'current': 3200.0, 'min': 1500.0, 'max': 3200.0}, {'current': 2111.787, 'min': 1500.0, 'max': 3200.0}, {'current': 3925.001, 'min': 1500.0, 'max': 3200.0}, {'current': 3137.781, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3200.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1499.733, 'min': 1500.0, 'max': 3200.0}, {'current': 2276.722, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1606.878, 'min': 1500.0, 'max': 3200.0}, {'current': 3120.917, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3924.981, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1916.093, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3924.985, 'min': 1500.0, 'max': 3200.0}, {'current': 3200.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1798.162, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3200.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3126.511, 'min': 1500.0, 'max': 3200.0}, {'current': 3200.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3139.064, 'min': 1500.0, 'max': 3200.0}, {'current': 3137.864, 'min': 1500.0, 'max': 3200.0}], 'disk': {'/': {'total': 50.0, 'used': 4.066852569580078}}, 'gpu': 'NVIDIA RTX A6000', 'gpu_count': 1, 'gpu_devices': [{'name': 'NVIDIA RTX A6000', 'memory_total': 51527024640}], 'memory': {'total': 503.7315444946289}}
24
+ 2024-09-01 08:08:30,718 INFO HandlerThread:1314 [system_monitor.py:probe():224] Finished collecting system info
25
+ 2024-09-01 08:08:30,718 INFO HandlerThread:1314 [system_monitor.py:probe():227] Publishing system info
26
+ 2024-09-01 08:08:30,719 INFO HandlerThread:1314 [system_monitor.py:probe():229] Finished publishing system info
27
+ 2024-09-01 08:08:30,724 DEBUG SenderThread:1314 [sender.py:send():391] send: files
28
+ 2024-09-01 08:08:30,724 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-metadata.json with policy now
29
+ 2024-09-01 08:08:30,806 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: python_packages
30
+ 2024-09-01 08:08:30,806 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: python_packages
31
+ 2024-09-01 08:08:30,807 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: stop_status
32
+ 2024-09-01 08:08:30,808 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: stop_status
33
+ 2024-09-01 08:08:30,809 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: internal_messages
34
+ 2024-09-01 08:08:31,023 DEBUG SenderThread:1314 [sender.py:send():391] send: telemetry
35
+ 2024-09-01 08:08:31,143 INFO wandb-upload_0:1314 [upload_job.py:push():130] Uploaded file /tmp/tmp53jjcldxwandb/h8od4rgj-wandb-metadata.json
36
+ 2024-09-01 08:08:31,662 INFO Thread-12 :1314 [dir_watcher.py:_on_file_created():271] file/dir created: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-metadata.json
37
+ 2024-09-01 08:08:31,662 INFO Thread-12 :1314 [dir_watcher.py:_on_file_created():271] file/dir created: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
38
+ 2024-09-01 08:08:31,662 INFO Thread-12 :1314 [dir_watcher.py:_on_file_created():271] file/dir created: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/requirements.txt
39
+ 2024-09-01 08:08:33,662 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
40
+ 2024-09-01 08:08:35,486 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
41
+ 2024-09-01 08:08:37,663 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
42
+ 2024-09-01 08:08:39,664 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
43
+ 2024-09-01 08:08:40,093 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
44
+ 2024-09-01 08:08:40,094 DEBUG SenderThread:1314 [sender.py:send():391] send: telemetry
45
+ 2024-09-01 08:08:40,800 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
46
+ 2024-09-01 08:08:40,813 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: internal_messages
47
+ 2024-09-01 08:08:41,665 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
48
+ 2024-09-01 08:08:42,978 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
49
+ 2024-09-01 08:08:42,979 DEBUG SenderThread:1314 [sender.py:send():391] send: history
50
+ 2024-09-01 08:08:42,979 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
51
+ 2024-09-01 08:08:42,980 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
52
+ 2024-09-01 08:08:43,666 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
53
+ 2024-09-01 08:08:43,666 INFO Thread-12 :1314 [dir_watcher.py:_on_file_created():271] file/dir created: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
54
+ 2024-09-01 08:08:45,667 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
55
+ 2024-09-01 08:08:45,808 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: stop_status
56
+ 2024-09-01 08:08:45,808 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: stop_status
57
+ 2024-09-01 08:08:45,879 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
58
+ 2024-09-01 08:08:45,954 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
59
+ 2024-09-01 08:08:45,955 DEBUG SenderThread:1314 [sender.py:send():391] send: history
60
+ 2024-09-01 08:08:45,955 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
61
+ 2024-09-01 08:08:45,956 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
62
+ 2024-09-01 08:08:46,667 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
63
+ 2024-09-01 08:08:47,667 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
64
+ 2024-09-01 08:08:48,830 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
65
+ 2024-09-01 08:08:48,831 DEBUG SenderThread:1314 [sender.py:send():391] send: history
66
+ 2024-09-01 08:08:48,832 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
67
+ 2024-09-01 08:08:48,832 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
68
+ 2024-09-01 08:08:49,668 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
69
+ 2024-09-01 08:08:49,669 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
70
+ 2024-09-01 08:08:50,809 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: internal_messages
71
+ 2024-09-01 08:08:50,894 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
72
+ 2024-09-01 08:08:51,669 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
73
+ 2024-09-01 08:08:52,333 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
74
+ 2024-09-01 08:08:52,334 DEBUG SenderThread:1314 [sender.py:send():391] send: history
75
+ 2024-09-01 08:08:52,334 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
76
+ 2024-09-01 08:08:52,334 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
77
+ 2024-09-01 08:08:52,335 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
78
+ 2024-09-01 08:08:52,670 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
79
+ 2024-09-01 08:08:53,670 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
80
+ 2024-09-01 08:08:55,120 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
81
+ 2024-09-01 08:08:55,121 DEBUG SenderThread:1314 [sender.py:send():391] send: history
82
+ 2024-09-01 08:08:55,122 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
83
+ 2024-09-01 08:08:55,122 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
84
+ 2024-09-01 08:08:55,671 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
85
+ 2024-09-01 08:08:55,672 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
86
+ 2024-09-01 08:08:56,810 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
87
+ 2024-09-01 08:08:57,672 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
88
+ 2024-09-01 08:08:58,009 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
89
+ 2024-09-01 08:08:58,010 DEBUG SenderThread:1314 [sender.py:send():391] send: history
90
+ 2024-09-01 08:08:58,011 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
91
+ 2024-09-01 08:08:58,012 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
92
+ 2024-09-01 08:08:58,673 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
93
+ 2024-09-01 08:08:59,674 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
94
+ 2024-09-01 08:09:00,808 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: stop_status
95
+ 2024-09-01 08:09:00,809 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: internal_messages
96
+ 2024-09-01 08:09:00,810 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: stop_status
97
+ 2024-09-01 08:09:00,953 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
98
+ 2024-09-01 08:09:00,954 DEBUG SenderThread:1314 [sender.py:send():391] send: history
99
+ 2024-09-01 08:09:00,955 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
100
+ 2024-09-01 08:09:00,955 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
101
+ 2024-09-01 08:09:01,674 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
102
+ 2024-09-01 08:09:01,675 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
103
+ 2024-09-01 08:09:02,613 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
104
+ 2024-09-01 08:09:03,675 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/config.yaml
105
+ 2024-09-01 08:09:03,676 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
106
+ 2024-09-01 08:09:03,815 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
107
+ 2024-09-01 08:09:03,817 DEBUG SenderThread:1314 [sender.py:send():391] send: history
108
+ 2024-09-01 08:09:03,817 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
109
+ 2024-09-01 08:09:03,817 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
110
+ 2024-09-01 08:09:04,676 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
111
+ 2024-09-01 08:09:05,676 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
112
+ 2024-09-01 08:09:07,247 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
113
+ 2024-09-01 08:09:07,248 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
114
+ 2024-09-01 08:09:07,249 DEBUG SenderThread:1314 [sender.py:send():391] send: history
115
+ 2024-09-01 08:09:07,249 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
116
+ 2024-09-01 08:09:07,250 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
117
+ 2024-09-01 08:09:07,678 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
118
+ 2024-09-01 08:09:07,678 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
119
+ 2024-09-01 08:09:08,251 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
120
+ 2024-09-01 08:09:09,678 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
121
+ 2024-09-01 08:09:10,825 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: internal_messages
122
+ 2024-09-01 08:09:13,826 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
123
+ 2024-09-01 08:09:15,810 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: stop_status
124
+ 2024-09-01 08:09:15,811 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: stop_status
125
+ 2024-09-01 08:09:18,980 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
126
+ 2024-09-01 08:09:20,837 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: internal_messages
127
+ 2024-09-01 08:09:24,838 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
128
+ 2024-09-01 08:09:29,702 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: check_version
129
+ 2024-09-01 08:09:29,703 DEBUG SenderThread:1314 [sender.py:send():391] send: telemetry
130
+ 2024-09-01 08:09:29,703 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: check_version
131
+ 2024-09-01 08:09:29,803 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: server_info
132
+ 2024-09-01 08:09:29,803 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: server_info
133
+ 2024-09-01 08:09:29,870 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
134
+ 2024-09-01 08:09:29,871 DEBUG SenderThread:1314 [sender.py:send():391] send: exit
135
+ 2024-09-01 08:09:29,872 INFO SenderThread:1314 [sender.py:send_exit():598] handling exit code: 0
136
+ 2024-09-01 08:09:29,872 INFO SenderThread:1314 [sender.py:send_exit():600] handling runtime: 59
137
+ 2024-09-01 08:09:29,874 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
138
+ 2024-09-01 08:09:29,875 INFO SenderThread:1314 [sender.py:send_exit():606] send defer
139
+ 2024-09-01 08:09:29,875 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
140
+ 2024-09-01 08:09:29,875 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 0
141
+ 2024-09-01 08:09:29,875 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
142
+ 2024-09-01 08:09:29,875 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 0
143
+ 2024-09-01 08:09:29,876 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 1
144
+ 2024-09-01 08:09:29,876 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
145
+ 2024-09-01 08:09:29,876 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 1
146
+ 2024-09-01 08:09:29,876 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
147
+ 2024-09-01 08:09:29,876 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 1
148
+ 2024-09-01 08:09:29,876 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 2
149
+ 2024-09-01 08:09:29,876 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
150
+ 2024-09-01 08:09:29,877 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 2
151
+ 2024-09-01 08:09:29,877 INFO HandlerThread:1314 [system_monitor.py:finish():203] Stopping system monitor
152
+ 2024-09-01 08:09:29,877 INFO HandlerThread:1314 [interfaces.py:finish():200] Joined cpu monitor
153
+ 2024-09-01 08:09:29,878 DEBUG SystemMonitor:1314 [system_monitor.py:_start():172] Starting system metrics aggregation loop
154
+ 2024-09-01 08:09:29,878 INFO HandlerThread:1314 [interfaces.py:finish():200] Joined disk monitor
155
+ 2024-09-01 08:09:29,878 DEBUG SystemMonitor:1314 [system_monitor.py:_start():179] Finished system metrics aggregation loop
156
+ 2024-09-01 08:09:29,879 DEBUG SystemMonitor:1314 [system_monitor.py:_start():183] Publishing last batch of metrics
157
+ 2024-09-01 08:09:29,895 INFO HandlerThread:1314 [interfaces.py:finish():200] Joined gpu monitor
158
+ 2024-09-01 08:09:29,895 INFO HandlerThread:1314 [interfaces.py:finish():200] Joined memory monitor
159
+ 2024-09-01 08:09:29,896 INFO HandlerThread:1314 [interfaces.py:finish():200] Joined network monitor
160
+ 2024-09-01 08:09:29,896 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
161
+ 2024-09-01 08:09:29,896 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 2
162
+ 2024-09-01 08:09:29,896 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 3
163
+ 2024-09-01 08:09:29,896 DEBUG SenderThread:1314 [sender.py:send():391] send: stats
164
+ 2024-09-01 08:09:29,897 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
165
+ 2024-09-01 08:09:29,897 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 3
166
+ 2024-09-01 08:09:29,899 DEBUG SenderThread:1314 [sender.py:send():391] send: history
167
+ 2024-09-01 08:09:29,899 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
168
+ 2024-09-01 08:09:29,899 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
169
+ 2024-09-01 08:09:29,900 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
170
+ 2024-09-01 08:09:29,900 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 3
171
+ 2024-09-01 08:09:29,900 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 4
172
+ 2024-09-01 08:09:29,900 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
173
+ 2024-09-01 08:09:29,900 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 4
174
+ 2024-09-01 08:09:29,901 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
175
+ 2024-09-01 08:09:29,901 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 4
176
+ 2024-09-01 08:09:29,901 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 5
177
+ 2024-09-01 08:09:29,901 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
178
+ 2024-09-01 08:09:29,901 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 5
179
+ 2024-09-01 08:09:29,901 DEBUG SenderThread:1314 [sender.py:send():391] send: summary
180
+ 2024-09-01 08:09:29,902 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
181
+ 2024-09-01 08:09:29,902 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
182
+ 2024-09-01 08:09:29,902 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 5
183
+ 2024-09-01 08:09:29,902 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 6
184
+ 2024-09-01 08:09:29,902 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
185
+ 2024-09-01 08:09:29,903 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 6
186
+ 2024-09-01 08:09:29,903 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
187
+ 2024-09-01 08:09:29,903 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 6
188
+ 2024-09-01 08:09:29,908 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
189
+ 2024-09-01 08:09:30,006 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 7
190
+ 2024-09-01 08:09:30,006 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
191
+ 2024-09-01 08:09:30,006 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 7
192
+ 2024-09-01 08:09:30,006 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
193
+ 2024-09-01 08:09:30,007 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 7
194
+ 2024-09-01 08:09:30,687 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/config.yaml
195
+ 2024-09-01 08:09:30,687 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
196
+ 2024-09-01 08:09:30,872 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: poll_exit
197
+ 2024-09-01 08:09:31,156 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 8
198
+ 2024-09-01 08:09:31,157 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: poll_exit
199
+ 2024-09-01 08:09:31,157 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
200
+ 2024-09-01 08:09:31,157 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 8
201
+ 2024-09-01 08:09:31,158 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
202
+ 2024-09-01 08:09:31,158 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 8
203
+ 2024-09-01 08:09:31,158 INFO SenderThread:1314 [job_builder.py:build():443] Attempting to build job artifact
204
+ 2024-09-01 08:09:31,159 INFO SenderThread:1314 [job_builder.py:_get_source_type():572] is repo sourced job
205
+ 2024-09-01 08:09:31,159 WARNING SenderThread:1314 [job_builder.py:_log_if_verbose():274] No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job
206
+ 2024-09-01 08:09:31,159 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 9
207
+ 2024-09-01 08:09:31,159 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
208
+ 2024-09-01 08:09:31,159 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 9
209
+ 2024-09-01 08:09:31,159 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
210
+ 2024-09-01 08:09:31,160 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 9
211
+ 2024-09-01 08:09:31,160 INFO SenderThread:1314 [dir_watcher.py:finish():358] shutting down directory watcher
212
+ 2024-09-01 08:09:31,687 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
213
+ 2024-09-01 08:09:31,688 INFO SenderThread:1314 [dir_watcher.py:finish():388] scan: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files
214
+ 2024-09-01 08:09:31,688 INFO SenderThread:1314 [dir_watcher.py:finish():402] scan save: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/config.yaml config.yaml
215
+ 2024-09-01 08:09:31,688 INFO SenderThread:1314 [dir_watcher.py:finish():402] scan save: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-metadata.json wandb-metadata.json
216
+ 2024-09-01 08:09:31,688 INFO SenderThread:1314 [dir_watcher.py:finish():402] scan save: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/requirements.txt requirements.txt
217
+ 2024-09-01 08:09:31,691 INFO SenderThread:1314 [dir_watcher.py:finish():402] scan save: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log output.log
218
+ 2024-09-01 08:09:31,694 INFO SenderThread:1314 [dir_watcher.py:finish():402] scan save: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json wandb-summary.json
219
+ 2024-09-01 08:09:31,696 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 10
220
+ 2024-09-01 08:09:31,696 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
221
+ 2024-09-01 08:09:31,698 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 10
222
+ 2024-09-01 08:09:31,698 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
223
+ 2024-09-01 08:09:31,698 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 10
224
+ 2024-09-01 08:09:31,698 INFO SenderThread:1314 [file_pusher.py:finish():169] shutting down file pusher
225
+ 2024-09-01 08:09:31,872 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: poll_exit
226
+ 2024-09-01 08:09:31,873 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: poll_exit
227
+ 2024-09-01 08:09:31,928 INFO wandb-upload_0:1314 [upload_job.py:push():130] Uploaded file content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/config.yaml
228
+ 2024-09-01 08:09:32,038 INFO wandb-upload_1:1314 [upload_job.py:push():130] Uploaded file content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/requirements.txt
229
+ 2024-09-01 08:09:32,135 INFO wandb-upload_3:1314 [upload_job.py:push():130] Uploaded file content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
230
+ 2024-09-01 08:09:32,170 INFO wandb-upload_2:1314 [upload_job.py:push():130] Uploaded file content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
231
+ 2024-09-01 08:09:32,370 INFO Thread-11 (_thread_body):1314 [sender.py:transition_state():626] send defer: 11
232
+ 2024-09-01 08:09:32,370 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
233
+ 2024-09-01 08:09:32,370 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 11
234
+ 2024-09-01 08:09:32,371 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
235
+ 2024-09-01 08:09:32,371 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 11
236
+ 2024-09-01 08:09:32,371 INFO SenderThread:1314 [file_pusher.py:join():175] waiting for file pusher
237
+ 2024-09-01 08:09:32,372 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 12
238
+ 2024-09-01 08:09:32,372 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
239
+ 2024-09-01 08:09:32,372 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 12
240
+ 2024-09-01 08:09:32,372 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
241
+ 2024-09-01 08:09:32,373 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 12
242
+ 2024-09-01 08:09:32,373 INFO SenderThread:1314 [file_stream.py:finish():601] file stream finish called
243
+ 2024-09-01 08:09:32,612 INFO SenderThread:1314 [file_stream.py:finish():605] file stream finish is done
244
+ 2024-09-01 08:09:32,612 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 13
245
+ 2024-09-01 08:09:32,612 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
246
+ 2024-09-01 08:09:32,612 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 13
247
+ 2024-09-01 08:09:32,613 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
248
+ 2024-09-01 08:09:32,613 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 13
249
+ 2024-09-01 08:09:32,613 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 14
250
+ 2024-09-01 08:09:32,613 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
251
+ 2024-09-01 08:09:32,613 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 14
252
+ 2024-09-01 08:09:32,613 DEBUG SenderThread:1314 [sender.py:send():391] send: final
253
+ 2024-09-01 08:09:32,614 DEBUG SenderThread:1314 [sender.py:send():391] send: footer
254
+ 2024-09-01 08:09:32,614 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
255
+ 2024-09-01 08:09:32,614 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 14
256
+ 2024-09-01 08:09:32,615 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: poll_exit
257
+ 2024-09-01 08:09:32,615 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: poll_exit
258
+ 2024-09-01 08:09:32,616 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: internal_messages
259
+ 2024-09-01 08:09:32,616 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: get_summary
260
+ 2024-09-01 08:09:32,617 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: sampled_history
261
+ 2024-09-01 08:09:32,618 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: shutdown
262
+ 2024-09-01 08:09:32,618 INFO HandlerThread:1314 [handler.py:finish():884] shutting down handler
263
+ 2024-09-01 08:09:33,615 INFO WriterThread:1314 [datastore.py:close():296] close: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/run-ra4ttuu9.wandb
264
+ 2024-09-01 08:09:33,615 INFO SenderThread:1314 [sender.py:finish():1631] shutting down sender
265
+ 2024-09-01 08:09:33,616 INFO SenderThread:1314 [file_pusher.py:finish():169] shutting down file pusher
266
+ 2024-09-01 08:09:33,616 INFO SenderThread:1314 [file_pusher.py:join():175] waiting for file pusher
wandb/debug.log ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Current SDK version is 0.17.8
2
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Configure stats pid to 1234
3
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Loading settings from /root/.config/wandb/settings
4
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Loading settings from /workspace/mistral-finetune/wandb/settings
5
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Loading settings from environment variables: {}
6
+ 2024-09-01 08:08:30,428 WARNING MainThread:1234 [wandb_setup.py:_flush():77] Could not find program at -m train
7
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m train'}
8
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Applying login settings: {'api_key': '***REDACTED***'}
9
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Applying login settings: {'api_key': '***REDACTED***'}
10
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Applying login settings: {}
11
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_init.py:_log_setup():524] Logging user logs to content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/logs/debug.log
12
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_init.py:_log_setup():525] Logging internal logs to content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/logs/debug-internal.log
13
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_init.py:init():607] calling init triggers
14
+ 2024-09-01 08:08:30,429 INFO MainThread:1234 [wandb_init.py:init():614] wandb.init called with sweep_config: {}
15
+ config: {'data': {'data': '', 'shuffle': False, 'instruct_data': './data/t_train.jsonl', 'eval_instruct_data': './data/t_eval.jsonl', 'instruct': {'shuffle': True, 'dynamic_chunk_fn_call': True}}, 'model_id_or_path': '../downloaded_model/mistral_models/7B-v0.3/', 'run_dir': './content/test_ultra_1', 'optim': {'lr': 0.0001, 'weight_decay': 0.1, 'pct_start': 0.05}, 'seed': 0, 'num_microbatches': 4, 'seq_len': 1024, 'batch_size': 1, 'max_norm': 1.0, 'max_steps': 10, 'log_freq': 1, 'ckpt_freq': 10, 'save_adapters': False, 'no_ckpt': False, 'num_ckpt_keep': 3, 'eval_freq': 5, 'no_eval': False, 'checkpoint': True, 'world_size': 1, 'wandb': {'project': 'finetuning_mistral', 'offline': False, 'key': '9e930bac040e715c4be9de064318956067b45479', 'run_name': None}, 'mlflow': {'tracking_uri': None, 'experiment_name': None}, 'lora': {'enable': True, 'rank': 16, 'dropout': 0.05, 'scaling': 2.0}}
16
+ 2024-09-01 08:08:30,429 INFO MainThread:1234 [wandb_init.py:init():657] starting backend
17
+ 2024-09-01 08:08:30,429 INFO MainThread:1234 [wandb_init.py:init():661] setting up manager
18
+ 2024-09-01 08:08:30,431 INFO MainThread:1234 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
19
+ 2024-09-01 08:08:30,432 INFO MainThread:1234 [wandb_init.py:init():669] backend started and connected
20
+ 2024-09-01 08:08:30,436 INFO MainThread:1234 [wandb_init.py:init():767] updated telemetry
21
+ 2024-09-01 08:08:30,441 INFO MainThread:1234 [wandb_init.py:init():800] communicating run to backend with 90.0 second timeout
22
+ 2024-09-01 08:08:30,665 INFO MainThread:1234 [wandb_init.py:init():851] starting run threads in backend
23
+ 2024-09-01 08:08:30,808 INFO MainThread:1234 [wandb_run.py:_console_start():2463] atexit reg
24
+ 2024-09-01 08:08:30,808 INFO MainThread:1234 [wandb_run.py:_redirect():2309] redirect: wrap_raw
25
+ 2024-09-01 08:08:30,808 INFO MainThread:1234 [wandb_run.py:_redirect():2374] Wrapping output streams.
26
+ 2024-09-01 08:08:30,808 INFO MainThread:1234 [wandb_run.py:_redirect():2399] Redirects installed.
27
+ 2024-09-01 08:08:30,810 INFO MainThread:1234 [wandb_init.py:init():894] run started, returning control to user process
28
+ 2024-09-01 08:09:29,670 INFO MainThread:1234 [wandb_run.py:_finish():2160] finishing run dwivedi-rishabh95-avizva/finetuning_mistral/ra4ttuu9
29
+ 2024-09-01 08:09:29,671 INFO MainThread:1234 [wandb_run.py:_atexit_cleanup():2424] got exitcode: 0
30
+ 2024-09-01 08:09:29,671 INFO MainThread:1234 [wandb_run.py:_restore():2406] restore
31
+ 2024-09-01 08:09:29,671 INFO MainThread:1234 [wandb_run.py:_restore():2412] restore done
32
+ 2024-09-01 08:09:29,671 INFO MainThread:1234 [wandb_run.py:_on_finish():2677] communicating current version
33
+ 2024-09-01 08:09:29,802 INFO MainThread:1234 [wandb_run.py:_on_finish():2686] got version response
34
+ 2024-09-01 08:09:33,620 INFO MainThread:1234 [wandb_run.py:_footer_history_summary_info():4078] rendering history
35
+ 2024-09-01 08:09:33,620 INFO MainThread:1234 [wandb_run.py:_footer_history_summary_info():4110] rendering summary
36
+ 2024-09-01 08:09:33,629 INFO MainThread:1234 [wandb_run.py:_footer_sync_info():4037] logging synced files
wandb/run-20240901_080830-ra4ttuu9/files/config.yaml ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ data:
4
+ desc: null
5
+ value:
6
+ data: ''
7
+ shuffle: false
8
+ instruct_data: ./data/t_train.jsonl
9
+ eval_instruct_data: ./data/t_eval.jsonl
10
+ instruct:
11
+ shuffle: true
12
+ dynamic_chunk_fn_call: true
13
+ model_id_or_path:
14
+ desc: null
15
+ value: ../downloaded_model/mistral_models/7B-v0.3/
16
+ run_dir:
17
+ desc: null
18
+ value: ./content/test_ultra_1
19
+ optim:
20
+ desc: null
21
+ value:
22
+ lr: 0.0001
23
+ weight_decay: 0.1
24
+ pct_start: 0.05
25
+ seed:
26
+ desc: null
27
+ value: 0
28
+ num_microbatches:
29
+ desc: null
30
+ value: 4
31
+ seq_len:
32
+ desc: null
33
+ value: 1024
34
+ batch_size:
35
+ desc: null
36
+ value: 1
37
+ max_norm:
38
+ desc: null
39
+ value: 1.0
40
+ max_steps:
41
+ desc: null
42
+ value: 10
43
+ log_freq:
44
+ desc: null
45
+ value: 1
46
+ ckpt_freq:
47
+ desc: null
48
+ value: 10
49
+ save_adapters:
50
+ desc: null
51
+ value: false
52
+ no_ckpt:
53
+ desc: null
54
+ value: false
55
+ num_ckpt_keep:
56
+ desc: null
57
+ value: 3
58
+ eval_freq:
59
+ desc: null
60
+ value: 5
61
+ no_eval:
62
+ desc: null
63
+ value: false
64
+ checkpoint:
65
+ desc: null
66
+ value: true
67
+ world_size:
68
+ desc: null
69
+ value: 1
70
+ wandb:
71
+ desc: null
72
+ value:
73
+ project: finetuning_mistral
74
+ offline: false
75
+ key: 9e930bac040e715c4be9de064318956067b45479
76
+ run_name: null
77
+ mlflow:
78
+ desc: null
79
+ value:
80
+ tracking_uri: null
81
+ experiment_name: null
82
+ lora:
83
+ desc: null
84
+ value:
85
+ enable: true
86
+ rank: 16
87
+ dropout: 0.05
88
+ scaling: 2.0
89
+ _wandb:
90
+ desc: null
91
+ value:
92
+ python_version: 3.10.12
93
+ cli_version: 0.17.8
94
+ framework: torch
95
+ is_jupyter_run: false
96
+ is_kaggle_kernel: false
97
+ start_time: 1725178110
98
+ t:
99
+ 1:
100
+ - 1
101
+ - 55
102
+ 2:
103
+ - 1
104
+ - 55
105
+ 3:
106
+ - 2
107
+ - 13
108
+ - 16
109
+ - 23
110
+ - 61
111
+ 4: 3.10.12
112
+ 5: 0.17.8
113
+ 8:
114
+ - 5
115
+ 13: linux-x86_64
wandb/run-20240901_080830-ra4ttuu9/files/output.log ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ wandb: WARNING Calling wandb.login() after wandb.init() has no effect.
3
+ 2024-09-01 08:08:30 (UTC) - 0:00:04 - finetune.wrapped_model - INFO - Reloading model from ../downloaded_model/mistral_models/7B-v0.3/consolidated.safetensors ...
4
+ 2024-09-01 08:08:30 (UTC) - 0:00:04 - finetune.wrapped_model - INFO - Converting model to dtype torch.bfloat16 ...
5
+ 2024-09-01 08:08:30 (UTC) - 0:00:04 - finetune.wrapped_model - INFO - Loaded model on cpu!
6
+ 2024-09-01 08:08:30 (UTC) - 0:00:04 - finetune.wrapped_model - INFO - Initializing lora layers ...
7
+ 2024-09-01 08:08:32 (UTC) - 0:00:05 - finetune.wrapped_model - INFO - Finished initialization!
8
+ 2024-09-01 08:08:32 (UTC) - 0:00:05 - finetune.wrapped_model - INFO - Sharding model over 1 GPUs ...
9
+ 2024-09-01 08:08:35 (UTC) - 0:00:08 - finetune.wrapped_model - INFO - Model sharded!
10
+ 2024-09-01 08:08:35 (UTC) - 0:00:08 - finetune.wrapped_model - INFO - 41,943,040 out of 7,289,966,592 parameters are finetuned (0.58%).
11
+ 2024-09-01 08:08:36 (UTC) - 0:00:09 - dataset - INFO - Loading data/t_train.jsonl ...
12
+ 2024-09-01 08:08:36 (UTC) - 0:00:09 - dataset - INFO - data/t_train.jsonl loaded and tokenized.
13
+ 2024-09-01 08:08:36 (UTC) - 0:00:09 - dataset - INFO - Shuffling data/t_train.jsonl ...
14
+ 2024-09-01 08:08:38 (UTC) - 0:00:11 - dataset - INFO - Shuffling data/t_train.jsonl ...
15
+ 2024-09-01 08:08:40 (UTC) - 0:00:13 - train - INFO - step: 000001 - done (%): 10.0 - loss: 1.135 - lr: 9.9e-05 - peak_alloc_mem (GB): 14.7 - alloc_mem (GB): 14.3 - words_per_second: 1177.8 - avg_words_per_second: 1177.8 - ETA: >2024-09-01 08:09:11
16
+ 2024-09-01 08:08:40 (UTC) - 0:00:13 - dataset - INFO - Shuffling data/t_train.jsonl ...
17
+ 2024-09-01 08:08:40 (UTC) - 0:00:14 - dataset - INFO - Shuffling data/t_train.jsonl ...
18
+ 2024-09-01 08:08:42 (UTC) - 0:00:15 - dataset - INFO - Shuffling data/t_train.jsonl ...
19
+ 2024-09-01 08:08:42 (UTC) - 0:00:16 - train - INFO - step: 000002 - done (%): 20.0 - loss: 0.882 - lr: 9.4e-05 - peak_alloc_mem (GB): 15.0 - alloc_mem (GB): 14.3 - words_per_second: 1420.5 - avg_words_per_second: 1287.8 - ETA: >2024-09-01 08:09:08
20
+ 2024-09-01 08:08:43 (UTC) - 0:00:17 - dataset - INFO - Shuffling data/t_train.jsonl ...
21
+ 2024-09-01 08:08:45 (UTC) - 0:00:18 - dataset - INFO - Shuffling data/t_train.jsonl ...
22
+ 2024-09-01 08:08:45 (UTC) - 0:00:19 - train - INFO - step: 000003 - done (%): 30.0 - loss: 0.757 - lr: 8.4e-05 - peak_alloc_mem (GB): 15.0 - alloc_mem (GB): 14.3 - words_per_second: 1376.9 - avg_words_per_second: 1316.2 - ETA: >2024-09-01 08:09:07
23
+ 2024-09-01 08:08:46 (UTC) - 0:00:19 - dataset - INFO - Shuffling data/t_train.jsonl ...
24
+ 2024-09-01 08:08:48 (UTC) - 0:00:21 - dataset - INFO - Shuffling data/t_train.jsonl ...
25
+ 2024-09-01 08:08:48 (UTC) - 0:00:22 - train - INFO - step: 000004 - done (%): 40.0 - loss: 0.465 - lr: 7.0e-05 - peak_alloc_mem (GB): 15.0 - alloc_mem (GB): 14.3 - words_per_second: 1424.8 - avg_words_per_second: 1341.8 - ETA: >2024-09-01 08:09:07
26
+ 2024-09-01 08:08:49 (UTC) - 0:00:22 - dataset - INFO - Shuffling data/t_train.jsonl ...
27
+ 2024-09-01 08:08:50 (UTC) - 0:00:24 - dataset - INFO - Shuffling data/t_train.jsonl ...
28
+ 2024-09-01 08:08:51 (UTC) - 0:00:24 - eval - INFO - Start eval...
29
+ 2024-09-01 08:08:52 (UTC) - 0:00:25 - eval - INFO - Eval finished!
30
+ 2024-09-01 08:08:52 (UTC) - 0:00:25 - train - INFO - step: 000005 - eval_perplexity: 1.260 - eval_loss: 0.334 - train_loss: 0.306
31
+ 2024-09-01 08:08:52 (UTC) - 0:00:25 - train - INFO - step: 000005 - done (%): 50.0 - loss: 0.306 - lr: 5.4e-05 - peak_alloc_mem (GB): 15.0 - alloc_mem (GB): 14.3 - words_per_second: 1169.4 - avg_words_per_second: 1303.4 - ETA: >2024-09-01 08:09:08
32
+ 2024-09-01 08:08:53 (UTC) - 0:00:26 - dataset - INFO - Shuffling data/t_train.jsonl ...
33
+ 2024-09-01 08:08:54 (UTC) - 0:00:27 - dataset - INFO - Shuffling data/t_train.jsonl ...
34
+ 2024-09-01 08:08:55 (UTC) - 0:00:28 - train - INFO - step: 000006 - done (%): 60.0 - loss: 0.225 - lr: 3.8e-05 - peak_alloc_mem (GB): 15.0 - alloc_mem (GB): 14.3 - words_per_second: 1470.7 - avg_words_per_second: 1328.6 - ETA: >2024-09-01 08:09:07
35
+ 2024-09-01 08:08:55 (UTC) - 0:00:29 - dataset - INFO - Shuffling data/t_train.jsonl ...
36
+ 2024-09-01 08:08:57 (UTC) - 0:00:30 - dataset - INFO - Shuffling data/t_train.jsonl ...
37
+ 2024-09-01 08:08:58 (UTC) - 0:00:31 - train - INFO - step: 000007 - done (%): 70.0 - loss: 0.164 - lr: 2.3e-05 - peak_alloc_mem (GB): 15.0 - alloc_mem (GB): 14.3 - words_per_second: 1418.2 - avg_words_per_second: 1340.7 - ETA: >2024-09-01 08:09:07
38
+ 2024-09-01 08:08:58 (UTC) - 0:00:31 - dataset - INFO - Shuffling data/t_train.jsonl ...
39
+ 2024-09-01 08:09:00 (UTC) - 0:00:33 - dataset - INFO - Shuffling data/t_train.jsonl ...
40
+ 2024-09-01 08:09:00 (UTC) - 0:00:34 - train - INFO - step: 000008 - done (%): 80.0 - loss: 0.130 - lr: 1.1e-05 - peak_alloc_mem (GB): 15.0 - alloc_mem (GB): 14.3 - words_per_second: 1411.1 - avg_words_per_second: 1349.1 - ETA: >2024-09-01 08:09:06
41
+ 2024-09-01 08:09:01 (UTC) - 0:00:34 - dataset - INFO - Shuffling data/t_train.jsonl ...
42
+ 2024-09-01 08:09:02 (UTC) - 0:00:36 - dataset - INFO - Shuffling data/t_train.jsonl ...
43
+ 2024-09-01 08:09:03 (UTC) - 0:00:37 - train - INFO - step: 000009 - done (%): 90.0 - loss: 0.152 - lr: 2.7e-06 - peak_alloc_mem (GB): 15.0 - alloc_mem (GB): 14.3 - words_per_second: 1412.0 - avg_words_per_second: 1355.8 - ETA: >2024-09-01 08:09:06
44
+ 2024-09-01 08:09:04 (UTC) - 0:00:37 - dataset - INFO - Shuffling data/t_train.jsonl ...
45
+ 2024-09-01 08:09:05 (UTC) - 0:00:39 - dataset - INFO - Shuffling data/t_train.jsonl ...
46
+ 2024-09-01 08:09:06 (UTC) - 0:00:39 - eval - INFO - Start eval...
47
+ 2024-09-01 08:09:07 (UTC) - 0:00:40 - eval - INFO - Eval finished!
48
+ 2024-09-01 08:09:07 (UTC) - 0:00:40 - train - INFO - step: 000010 - eval_perplexity: 1.134 - eval_loss: 0.182 - train_loss: 0.108
49
+ 2024-09-01 08:09:07 (UTC) - 0:00:40 - train - INFO - step: 000010 - done (%): 100.0 - loss: 0.108 - lr: 4.0e-10 - peak_alloc_mem (GB): 15.0 - alloc_mem (GB): 14.3 - words_per_second: 1193.5 - avg_words_per_second: 1337.6 - ETA: >2024-09-01 08:09:07
50
+ 2024-09-01 08:09:07 (UTC) - 0:00:40 - checkpointing - INFO - Dumping checkpoint in content/test_ultra_1/checkpoints/checkpoint_000010/consolidated using tmp name: tmp.consolidated
51
+ 2024-09-01 08:09:29 (UTC) - 0:01:02 - checkpointing - INFO - Done dumping checkpoint in content/test_ultra_1/checkpoints/checkpoint_000010/consolidated for step: 10
52
+ 2024-09-01 08:09:29 (UTC) - 0:01:02 - checkpointing - INFO - Done deleting checkpoints
53
+ 2024-09-01 08:09:29 (UTC) - 0:01:02 - checkpointing - INFO - Done!
54
+ 2024-09-01 08:09:29 (UTC) - 0:01:02 - train - INFO - done!
55
+ 2024-09-01 08:09:29 (UTC) - 0:01:02 - utils - INFO - Closing: eval_logger
wandb/run-20240901_080830-ra4ttuu9/files/requirements.txt ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Babel==2.13.1
2
+ GitPython==3.1.43
3
+ Jinja2==3.1.2
4
+ Markdown==3.7
5
+ MarkupSafe==2.1.3
6
+ Pillow==10.1.0
7
+ PyGObject==3.42.1
8
+ PyJWT==2.3.0
9
+ PyYAML==6.0.1
10
+ Pygments==2.17.2
11
+ SecretStorage==3.3.1
12
+ Send2Trash==1.8.2
13
+ Werkzeug==3.0.4
14
+ absl-py==2.1.0
15
+ annotated-types==0.7.0
16
+ anyio==4.1.0
17
+ argon2-cffi-bindings==21.2.0
18
+ argon2-cffi==23.1.0
19
+ arrow==1.3.0
20
+ asttokens==2.4.1
21
+ async-lru==2.0.4
22
+ attrs==23.1.0
23
+ beautifulsoup4==4.12.2
24
+ bleach==6.1.0
25
+ blinker==1.4
26
+ certifi==2023.11.17
27
+ cffi==1.16.0
28
+ charset-normalizer==3.3.2
29
+ click==8.1.7
30
+ comm==0.2.0
31
+ cryptography==3.4.8
32
+ dbus-python==1.2.18
33
+ debugpy==1.8.0
34
+ decorator==5.1.1
35
+ defusedxml==0.7.1
36
+ distro==1.7.0
37
+ docker-pycreds==0.4.0
38
+ docstring_parser==0.16
39
+ entrypoints==0.4
40
+ et-xmlfile==1.1.0
41
+ exceptiongroup==1.2.0
42
+ executing==2.0.1
43
+ fastjsonschema==2.19.0
44
+ filelock==3.13.1
45
+ fire==0.6.0
46
+ fqdn==1.5.1
47
+ fsspec==2023.10.0
48
+ gitdb==4.0.11
49
+ grpcio==1.66.1
50
+ httplib2==0.20.2
51
+ huggingface-hub==0.24.6
52
+ idna==3.6
53
+ importlib-metadata==4.6.4
54
+ ipykernel==6.26.0
55
+ ipython-genutils==0.2.0
56
+ ipython==8.18.1
57
+ ipywidgets==8.1.1
58
+ isoduration==20.11.0
59
+ jedi==0.19.1
60
+ jeepney==0.7.1
61
+ json5==0.9.14
62
+ jsonpointer==2.4
63
+ jsonschema-specifications==2023.11.1
64
+ jsonschema==4.23.0
65
+ jupyter-archive==3.4.0
66
+ jupyter-contrib-core==0.4.2
67
+ jupyter-contrib-nbextensions==0.7.0
68
+ jupyter-events==0.9.0
69
+ jupyter-highlight-selected-word==0.2.0
70
+ jupyter-lsp==2.2.1
71
+ jupyter-nbextensions-configurator==0.6.3
72
+ jupyter_client==7.4.9
73
+ jupyter_core==5.5.0
74
+ jupyter_server==2.10.1
75
+ jupyter_server_terminals==0.4.4
76
+ jupyterlab-widgets==3.0.9
77
+ jupyterlab==4.0.9
78
+ jupyterlab_pygments==0.3.0
79
+ jupyterlab_server==2.25.2
80
+ keyring==23.5.0
81
+ launchpadlib==1.10.16
82
+ lazr.restfulclient==0.14.4
83
+ lazr.uri==1.0.6
84
+ lxml==4.9.3
85
+ matplotlib-inline==0.1.6
86
+ mistral_common==1.3.4
87
+ mistune==3.0.2
88
+ more-itertools==8.10.0
89
+ mpmath==1.3.0
90
+ nbclassic==1.0.0
91
+ nbclient==0.9.0
92
+ nbconvert==7.11.0
93
+ nbformat==5.9.2
94
+ nest-asyncio==1.5.8
95
+ networkx==3.2.1
96
+ notebook==6.5.5
97
+ notebook_shim==0.2.3
98
+ numpy==1.26.2
99
+ nvidia-cublas-cu12==12.1.3.1
100
+ nvidia-cuda-cupti-cu12==12.1.105
101
+ nvidia-cuda-nvrtc-cu12==12.1.105
102
+ nvidia-cuda-runtime-cu12==12.1.105
103
+ nvidia-cudnn-cu12==8.9.2.26
104
+ nvidia-cufft-cu12==11.0.2.54
105
+ nvidia-curand-cu12==10.3.2.106
106
+ nvidia-cusolver-cu12==11.4.5.107
107
+ nvidia-cusparse-cu12==12.1.0.106
108
+ nvidia-nccl-cu12==2.19.3
109
+ nvidia-nvjitlink-cu12==12.3.101
110
+ nvidia-nvtx-cu12==12.1.105
111
+ oauthlib==3.2.0
112
+ openpyxl==3.1.5
113
+ overrides==7.4.0
114
+ packaging==23.2
115
+ pandas==2.2.2
116
+ pandocfilters==1.5.0
117
+ parso==0.8.3
118
+ pexpect==4.9.0
119
+ pip==23.3.1
120
+ platformdirs==4.0.0
121
+ prometheus-client==0.19.0
122
+ prompt-toolkit==3.0.41
123
+ protobuf==5.28.0
124
+ psutil==5.9.6
125
+ ptyprocess==0.7.0
126
+ pure-eval==0.2.2
127
+ pycparser==2.21
128
+ pydantic==2.8.2
129
+ pydantic_core==2.20.1
130
+ pyparsing==2.4.7
131
+ python-apt==2.4.0+ubuntu2
132
+ python-dateutil==2.8.2
133
+ python-json-logger==2.0.7
134
+ pytz==2024.1
135
+ pyzmq==24.0.1
136
+ referencing==0.31.0
137
+ regex==2024.7.24
138
+ requests==2.31.0
139
+ rfc3339-validator==0.1.4
140
+ rfc3986-validator==0.1.1
141
+ rpds-py==0.13.1
142
+ safetensors==0.4.4
143
+ sentencepiece==0.2.0
144
+ sentry-sdk==2.13.0
145
+ setproctitle==1.3.3
146
+ setuptools==69.0.2
147
+ simple_parsing==0.1.5
148
+ six==1.16.0
149
+ smmap==5.0.1
150
+ sniffio==1.3.0
151
+ soupsieve==2.5
152
+ stack-data==0.6.3
153
+ sympy==1.12
154
+ tensorboard-data-server==0.7.2
155
+ tensorboard==2.17.1
156
+ termcolor==2.4.0
157
+ terminado==0.18.0
158
+ tiktoken==0.7.0
159
+ tinycss2==1.2.1
160
+ tomli==2.0.1
161
+ torch==2.2.0
162
+ torchaudio==2.1.1
163
+ torchvision==0.16.1
164
+ tornado==6.3.3
165
+ tqdm==4.66.5
166
+ traitlets==5.13.0
167
+ triton==2.2.0
168
+ types-python-dateutil==2.8.19.14
169
+ typing_extensions==4.12.2
170
+ tzdata==2024.1
171
+ uri-template==1.3.0
172
+ urllib3==2.1.0
173
+ wadllib==1.3.6
174
+ wandb==0.17.8
175
+ wcwidth==0.2.12
176
+ webcolors==1.13
177
+ webencodings==0.5.1
178
+ websocket-client==1.6.4
179
+ wheel==0.42.0
180
+ widgetsnbextension==4.0.9
181
+ xformers==0.0.24
182
+ zipp==1.0.0
wandb/run-20240901_080830-ra4ttuu9/files/wandb-metadata.json ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.5.0-35-generic-x86_64-with-glibc2.35",
3
+ "python": "3.10.12",
4
+ "heartbeatAt": "2024-09-01T08:08:30.709894",
5
+ "startedAt": "2024-09-01T08:08:30.425931",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [
9
+ "example.yaml"
10
+ ],
11
+ "state": "running",
12
+ "program": "-m train",
13
+ "codePathLocal": null,
14
+ "git": {
15
+ "remote": "https://github.com/mistralai/mistral-finetune.git",
16
+ "commit": "0b0eaac415ba6317e198aa2207c7bd4b3973adbe"
17
+ },
18
+ "email": null,
19
+ "root": "/workspace/mistral-finetune",
20
+ "host": "f6e183d76cb1",
21
+ "username": "root",
22
+ "executable": "/usr/bin/python",
23
+ "cpu_count": 32,
24
+ "cpu_count_logical": 64,
25
+ "cpu_freq": {
26
+ "current": 1976.101125,
27
+ "min": 1500.0,
28
+ "max": 3200.0
29
+ },
30
+ "cpu_freq_per_core": [
31
+ {
32
+ "current": 1500.0,
33
+ "min": 1500.0,
34
+ "max": 3200.0
35
+ },
36
+ {
37
+ "current": 1500.0,
38
+ "min": 1500.0,
39
+ "max": 3200.0
40
+ },
41
+ {
42
+ "current": 1500.0,
43
+ "min": 1500.0,
44
+ "max": 3200.0
45
+ },
46
+ {
47
+ "current": 1500.0,
48
+ "min": 1500.0,
49
+ "max": 3200.0
50
+ },
51
+ {
52
+ "current": 3925.001,
53
+ "min": 1500.0,
54
+ "max": 3200.0
55
+ },
56
+ {
57
+ "current": 1500.0,
58
+ "min": 1500.0,
59
+ "max": 3200.0
60
+ },
61
+ {
62
+ "current": 1500.0,
63
+ "min": 1500.0,
64
+ "max": 3200.0
65
+ },
66
+ {
67
+ "current": 2300.0,
68
+ "min": 1500.0,
69
+ "max": 3200.0
70
+ },
71
+ {
72
+ "current": 1500.0,
73
+ "min": 1500.0,
74
+ "max": 3200.0
75
+ },
76
+ {
77
+ "current": 3200.0,
78
+ "min": 1500.0,
79
+ "max": 3200.0
80
+ },
81
+ {
82
+ "current": 1500.0,
83
+ "min": 1500.0,
84
+ "max": 3200.0
85
+ },
86
+ {
87
+ "current": 2773.255,
88
+ "min": 1500.0,
89
+ "max": 3200.0
90
+ },
91
+ {
92
+ "current": 1500.0,
93
+ "min": 1500.0,
94
+ "max": 3200.0
95
+ },
96
+ {
97
+ "current": 1500.0,
98
+ "min": 1500.0,
99
+ "max": 3200.0
100
+ },
101
+ {
102
+ "current": 3200.0,
103
+ "min": 1500.0,
104
+ "max": 3200.0
105
+ },
106
+ {
107
+ "current": 1500.0,
108
+ "min": 1500.0,
109
+ "max": 3200.0
110
+ },
111
+ {
112
+ "current": 1500.0,
113
+ "min": 1500.0,
114
+ "max": 3200.0
115
+ },
116
+ {
117
+ "current": 1500.0,
118
+ "min": 1500.0,
119
+ "max": 3200.0
120
+ },
121
+ {
122
+ "current": 1500.0,
123
+ "min": 1500.0,
124
+ "max": 3200.0
125
+ },
126
+ {
127
+ "current": 1500.0,
128
+ "min": 1500.0,
129
+ "max": 3200.0
130
+ },
131
+ {
132
+ "current": 1500.0,
133
+ "min": 1500.0,
134
+ "max": 3200.0
135
+ },
136
+ {
137
+ "current": 1500.0,
138
+ "min": 1500.0,
139
+ "max": 3200.0
140
+ },
141
+ {
142
+ "current": 1500.0,
143
+ "min": 1500.0,
144
+ "max": 3200.0
145
+ },
146
+ {
147
+ "current": 1500.0,
148
+ "min": 1500.0,
149
+ "max": 3200.0
150
+ },
151
+ {
152
+ "current": 2221.478,
153
+ "min": 1500.0,
154
+ "max": 3200.0
155
+ },
156
+ {
157
+ "current": 2062.966,
158
+ "min": 1500.0,
159
+ "max": 3200.0
160
+ },
161
+ {
162
+ "current": 3200.0,
163
+ "min": 1500.0,
164
+ "max": 3200.0
165
+ },
166
+ {
167
+ "current": 2111.787,
168
+ "min": 1500.0,
169
+ "max": 3200.0
170
+ },
171
+ {
172
+ "current": 3925.001,
173
+ "min": 1500.0,
174
+ "max": 3200.0
175
+ },
176
+ {
177
+ "current": 3137.781,
178
+ "min": 1500.0,
179
+ "max": 3200.0
180
+ },
181
+ {
182
+ "current": 1500.0,
183
+ "min": 1500.0,
184
+ "max": 3200.0
185
+ },
186
+ {
187
+ "current": 1500.0,
188
+ "min": 1500.0,
189
+ "max": 3200.0
190
+ },
191
+ {
192
+ "current": 3200.0,
193
+ "min": 1500.0,
194
+ "max": 3200.0
195
+ },
196
+ {
197
+ "current": 1500.0,
198
+ "min": 1500.0,
199
+ "max": 3200.0
200
+ },
201
+ {
202
+ "current": 1499.733,
203
+ "min": 1500.0,
204
+ "max": 3200.0
205
+ },
206
+ {
207
+ "current": 2276.722,
208
+ "min": 1500.0,
209
+ "max": 3200.0
210
+ },
211
+ {
212
+ "current": 1500.0,
213
+ "min": 1500.0,
214
+ "max": 3200.0
215
+ },
216
+ {
217
+ "current": 1500.0,
218
+ "min": 1500.0,
219
+ "max": 3200.0
220
+ },
221
+ {
222
+ "current": 1500.0,
223
+ "min": 1500.0,
224
+ "max": 3200.0
225
+ },
226
+ {
227
+ "current": 1500.0,
228
+ "min": 1500.0,
229
+ "max": 3200.0
230
+ },
231
+ {
232
+ "current": 1606.878,
233
+ "min": 1500.0,
234
+ "max": 3200.0
235
+ },
236
+ {
237
+ "current": 3120.917,
238
+ "min": 1500.0,
239
+ "max": 3200.0
240
+ },
241
+ {
242
+ "current": 1500.0,
243
+ "min": 1500.0,
244
+ "max": 3200.0
245
+ },
246
+ {
247
+ "current": 1500.0,
248
+ "min": 1500.0,
249
+ "max": 3200.0
250
+ },
251
+ {
252
+ "current": 1500.0,
253
+ "min": 1500.0,
254
+ "max": 3200.0
255
+ },
256
+ {
257
+ "current": 1500.0,
258
+ "min": 1500.0,
259
+ "max": 3200.0
260
+ },
261
+ {
262
+ "current": 3924.981,
263
+ "min": 1500.0,
264
+ "max": 3200.0
265
+ },
266
+ {
267
+ "current": 1500.0,
268
+ "min": 1500.0,
269
+ "max": 3200.0
270
+ },
271
+ {
272
+ "current": 1500.0,
273
+ "min": 1500.0,
274
+ "max": 3200.0
275
+ },
276
+ {
277
+ "current": 1500.0,
278
+ "min": 1500.0,
279
+ "max": 3200.0
280
+ },
281
+ {
282
+ "current": 1916.093,
283
+ "min": 1500.0,
284
+ "max": 3200.0
285
+ },
286
+ {
287
+ "current": 1500.0,
288
+ "min": 1500.0,
289
+ "max": 3200.0
290
+ },
291
+ {
292
+ "current": 3924.985,
293
+ "min": 1500.0,
294
+ "max": 3200.0
295
+ },
296
+ {
297
+ "current": 3200.0,
298
+ "min": 1500.0,
299
+ "max": 3200.0
300
+ },
301
+ {
302
+ "current": 1500.0,
303
+ "min": 1500.0,
304
+ "max": 3200.0
305
+ },
306
+ {
307
+ "current": 1500.0,
308
+ "min": 1500.0,
309
+ "max": 3200.0
310
+ },
311
+ {
312
+ "current": 1798.162,
313
+ "min": 1500.0,
314
+ "max": 3200.0
315
+ },
316
+ {
317
+ "current": 1500.0,
318
+ "min": 1500.0,
319
+ "max": 3200.0
320
+ },
321
+ {
322
+ "current": 3200.0,
323
+ "min": 1500.0,
324
+ "max": 3200.0
325
+ },
326
+ {
327
+ "current": 3126.511,
328
+ "min": 1500.0,
329
+ "max": 3200.0
330
+ },
331
+ {
332
+ "current": 3200.0,
333
+ "min": 1500.0,
334
+ "max": 3200.0
335
+ },
336
+ {
337
+ "current": 1500.0,
338
+ "min": 1500.0,
339
+ "max": 3200.0
340
+ },
341
+ {
342
+ "current": 3139.064,
343
+ "min": 1500.0,
344
+ "max": 3200.0
345
+ },
346
+ {
347
+ "current": 3137.864,
348
+ "min": 1500.0,
349
+ "max": 3200.0
350
+ }
351
+ ],
352
+ "disk": {
353
+ "/": {
354
+ "total": 50.0,
355
+ "used": 4.066852569580078
356
+ }
357
+ },
358
+ "gpu": "NVIDIA RTX A6000",
359
+ "gpu_count": 1,
360
+ "gpu_devices": [
361
+ {
362
+ "name": "NVIDIA RTX A6000",
363
+ "memory_total": 51527024640
364
+ }
365
+ ],
366
+ "memory": {
367
+ "total": 503.7315444946289
368
+ }
369
+ }
wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/lr": 3.9999999999999996e-10, "train/loss": 0.108039490878582, "train/percent_done": 100.0, "train/peak_allocated_mem": 14.968535900115967, "train/allocated_mem": 14.327418327331543, "train/wps": 1193.533250331889, "train/avg_wps": 1337.6038764958669, "train/eta_in_seconds": 0.0, "_timestamp": 1725178147.2484581, "_runtime": 36.81593322753906, "_step": 10, "eval/train_loss": 0.108039490878582, "eval/perplexity": 1.1342318058013916, "eval/eval_loss": 0.18171557784080505, "_wandb": {"runtime": 59}}
wandb/run-20240901_080830-ra4ttuu9/logs/debug-internal.log ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-09-01 08:08:30,433 INFO StreamThr :1314 [internal.py:wandb_internal():85] W&B internal server running at pid: 1314, started at: 2024-09-01 08:08:30.432847
2
+ 2024-09-01 08:08:30,434 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status
3
+ 2024-09-01 08:08:30,436 INFO WriterThread:1314 [datastore.py:open_for_write():87] open: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/run-ra4ttuu9.wandb
4
+ 2024-09-01 08:08:30,437 DEBUG SenderThread:1314 [sender.py:send():391] send: header
5
+ 2024-09-01 08:08:30,442 DEBUG SenderThread:1314 [sender.py:send():391] send: run
6
+ 2024-09-01 08:08:30,661 INFO SenderThread:1314 [dir_watcher.py:__init__():211] watching files in: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files
7
+ 2024-09-01 08:08:30,661 INFO SenderThread:1314 [sender.py:_start_run_threads():1200] run started: ra4ttuu9 with start time 1725178110.432525
8
+ 2024-09-01 08:08:30,668 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: run_start
9
+ 2024-09-01 08:08:30,682 DEBUG HandlerThread:1314 [system_info.py:__init__():26] System info init
10
+ 2024-09-01 08:08:30,682 DEBUG HandlerThread:1314 [system_info.py:__init__():41] System info init done
11
+ 2024-09-01 08:08:30,682 INFO HandlerThread:1314 [system_monitor.py:start():194] Starting system monitor
12
+ 2024-09-01 08:08:30,683 INFO SystemMonitor:1314 [system_monitor.py:_start():158] Starting system asset monitoring threads
13
+ 2024-09-01 08:08:30,683 INFO HandlerThread:1314 [system_monitor.py:probe():214] Collecting system info
14
+ 2024-09-01 08:08:30,684 INFO SystemMonitor:1314 [interfaces.py:start():188] Started cpu monitoring
15
+ 2024-09-01 08:08:30,684 INFO SystemMonitor:1314 [interfaces.py:start():188] Started disk monitoring
16
+ 2024-09-01 08:08:30,685 INFO SystemMonitor:1314 [interfaces.py:start():188] Started gpu monitoring
17
+ 2024-09-01 08:08:30,686 INFO SystemMonitor:1314 [interfaces.py:start():188] Started memory monitoring
18
+ 2024-09-01 08:08:30,686 INFO SystemMonitor:1314 [interfaces.py:start():188] Started network monitoring
19
+ 2024-09-01 08:08:30,709 DEBUG HandlerThread:1314 [system_info.py:probe():152] Probing system
20
+ 2024-09-01 08:08:30,711 DEBUG HandlerThread:1314 [system_info.py:_probe_git():137] Probing git
21
+ 2024-09-01 08:08:30,718 DEBUG HandlerThread:1314 [system_info.py:_probe_git():145] Probing git done
22
+ 2024-09-01 08:08:30,718 DEBUG HandlerThread:1314 [system_info.py:probe():200] Probing system done
23
+ 2024-09-01 08:08:30,718 DEBUG HandlerThread:1314 [system_monitor.py:probe():223] {'os': 'Linux-6.5.0-35-generic-x86_64-with-glibc2.35', 'python': '3.10.12', 'heartbeatAt': '2024-09-01T08:08:30.709894', 'startedAt': '2024-09-01T08:08:30.425931', 'docker': None, 'cuda': None, 'args': ('example.yaml',), 'state': 'running', 'program': '-m train', 'codePathLocal': None, 'git': {'remote': 'https://github.com/mistralai/mistral-finetune.git', 'commit': '0b0eaac415ba6317e198aa2207c7bd4b3973adbe'}, 'email': None, 'root': '/workspace/mistral-finetune', 'host': 'f6e183d76cb1', 'username': 'root', 'executable': '/usr/bin/python', 'cpu_count': 32, 'cpu_count_logical': 64, 'cpu_freq': {'current': 1976.101125, 'min': 1500.0, 'max': 3200.0}, 'cpu_freq_per_core': [{'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3925.001, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 2300.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3200.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 2773.255, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3200.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 2221.478, 'min': 1500.0, 'max': 3200.0}, {'current': 2062.966, 'min': 1500.0, 'max': 3200.0}, {'current': 3200.0, 'min': 1500.0, 'max': 3200.0}, {'current': 2111.787, 'min': 1500.0, 'max': 3200.0}, {'current': 3925.001, 'min': 1500.0, 'max': 3200.0}, {'current': 3137.781, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3200.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1499.733, 'min': 1500.0, 'max': 3200.0}, {'current': 2276.722, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1606.878, 'min': 1500.0, 'max': 3200.0}, {'current': 3120.917, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3924.981, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1916.093, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3924.985, 'min': 1500.0, 'max': 3200.0}, {'current': 3200.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1798.162, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3200.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3126.511, 'min': 1500.0, 'max': 3200.0}, {'current': 3200.0, 'min': 1500.0, 'max': 3200.0}, {'current': 1500.0, 'min': 1500.0, 'max': 3200.0}, {'current': 3139.064, 'min': 1500.0, 'max': 3200.0}, {'current': 3137.864, 'min': 1500.0, 'max': 3200.0}], 'disk': {'/': {'total': 50.0, 'used': 4.066852569580078}}, 'gpu': 'NVIDIA RTX A6000', 'gpu_count': 1, 'gpu_devices': [{'name': 'NVIDIA RTX A6000', 'memory_total': 51527024640}], 'memory': {'total': 503.7315444946289}}
24
+ 2024-09-01 08:08:30,718 INFO HandlerThread:1314 [system_monitor.py:probe():224] Finished collecting system info
25
+ 2024-09-01 08:08:30,718 INFO HandlerThread:1314 [system_monitor.py:probe():227] Publishing system info
26
+ 2024-09-01 08:08:30,719 INFO HandlerThread:1314 [system_monitor.py:probe():229] Finished publishing system info
27
+ 2024-09-01 08:08:30,724 DEBUG SenderThread:1314 [sender.py:send():391] send: files
28
+ 2024-09-01 08:08:30,724 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-metadata.json with policy now
29
+ 2024-09-01 08:08:30,806 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: python_packages
30
+ 2024-09-01 08:08:30,806 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: python_packages
31
+ 2024-09-01 08:08:30,807 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: stop_status
32
+ 2024-09-01 08:08:30,808 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: stop_status
33
+ 2024-09-01 08:08:30,809 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: internal_messages
34
+ 2024-09-01 08:08:31,023 DEBUG SenderThread:1314 [sender.py:send():391] send: telemetry
35
+ 2024-09-01 08:08:31,143 INFO wandb-upload_0:1314 [upload_job.py:push():130] Uploaded file /tmp/tmp53jjcldxwandb/h8od4rgj-wandb-metadata.json
36
+ 2024-09-01 08:08:31,662 INFO Thread-12 :1314 [dir_watcher.py:_on_file_created():271] file/dir created: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-metadata.json
37
+ 2024-09-01 08:08:31,662 INFO Thread-12 :1314 [dir_watcher.py:_on_file_created():271] file/dir created: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
38
+ 2024-09-01 08:08:31,662 INFO Thread-12 :1314 [dir_watcher.py:_on_file_created():271] file/dir created: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/requirements.txt
39
+ 2024-09-01 08:08:33,662 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
40
+ 2024-09-01 08:08:35,486 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
41
+ 2024-09-01 08:08:37,663 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
42
+ 2024-09-01 08:08:39,664 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
43
+ 2024-09-01 08:08:40,093 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
44
+ 2024-09-01 08:08:40,094 DEBUG SenderThread:1314 [sender.py:send():391] send: telemetry
45
+ 2024-09-01 08:08:40,800 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
46
+ 2024-09-01 08:08:40,813 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: internal_messages
47
+ 2024-09-01 08:08:41,665 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
48
+ 2024-09-01 08:08:42,978 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
49
+ 2024-09-01 08:08:42,979 DEBUG SenderThread:1314 [sender.py:send():391] send: history
50
+ 2024-09-01 08:08:42,979 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
51
+ 2024-09-01 08:08:42,980 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
52
+ 2024-09-01 08:08:43,666 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
53
+ 2024-09-01 08:08:43,666 INFO Thread-12 :1314 [dir_watcher.py:_on_file_created():271] file/dir created: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
54
+ 2024-09-01 08:08:45,667 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
55
+ 2024-09-01 08:08:45,808 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: stop_status
56
+ 2024-09-01 08:08:45,808 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: stop_status
57
+ 2024-09-01 08:08:45,879 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
58
+ 2024-09-01 08:08:45,954 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
59
+ 2024-09-01 08:08:45,955 DEBUG SenderThread:1314 [sender.py:send():391] send: history
60
+ 2024-09-01 08:08:45,955 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
61
+ 2024-09-01 08:08:45,956 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
62
+ 2024-09-01 08:08:46,667 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
63
+ 2024-09-01 08:08:47,667 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
64
+ 2024-09-01 08:08:48,830 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
65
+ 2024-09-01 08:08:48,831 DEBUG SenderThread:1314 [sender.py:send():391] send: history
66
+ 2024-09-01 08:08:48,832 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
67
+ 2024-09-01 08:08:48,832 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
68
+ 2024-09-01 08:08:49,668 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
69
+ 2024-09-01 08:08:49,669 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
70
+ 2024-09-01 08:08:50,809 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: internal_messages
71
+ 2024-09-01 08:08:50,894 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
72
+ 2024-09-01 08:08:51,669 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
73
+ 2024-09-01 08:08:52,333 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
74
+ 2024-09-01 08:08:52,334 DEBUG SenderThread:1314 [sender.py:send():391] send: history
75
+ 2024-09-01 08:08:52,334 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
76
+ 2024-09-01 08:08:52,334 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
77
+ 2024-09-01 08:08:52,335 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
78
+ 2024-09-01 08:08:52,670 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
79
+ 2024-09-01 08:08:53,670 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
80
+ 2024-09-01 08:08:55,120 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
81
+ 2024-09-01 08:08:55,121 DEBUG SenderThread:1314 [sender.py:send():391] send: history
82
+ 2024-09-01 08:08:55,122 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
83
+ 2024-09-01 08:08:55,122 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
84
+ 2024-09-01 08:08:55,671 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
85
+ 2024-09-01 08:08:55,672 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
86
+ 2024-09-01 08:08:56,810 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
87
+ 2024-09-01 08:08:57,672 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
88
+ 2024-09-01 08:08:58,009 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
89
+ 2024-09-01 08:08:58,010 DEBUG SenderThread:1314 [sender.py:send():391] send: history
90
+ 2024-09-01 08:08:58,011 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
91
+ 2024-09-01 08:08:58,012 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
92
+ 2024-09-01 08:08:58,673 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
93
+ 2024-09-01 08:08:59,674 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
94
+ 2024-09-01 08:09:00,808 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: stop_status
95
+ 2024-09-01 08:09:00,809 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: internal_messages
96
+ 2024-09-01 08:09:00,810 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: stop_status
97
+ 2024-09-01 08:09:00,953 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
98
+ 2024-09-01 08:09:00,954 DEBUG SenderThread:1314 [sender.py:send():391] send: history
99
+ 2024-09-01 08:09:00,955 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
100
+ 2024-09-01 08:09:00,955 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
101
+ 2024-09-01 08:09:01,674 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
102
+ 2024-09-01 08:09:01,675 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
103
+ 2024-09-01 08:09:02,613 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
104
+ 2024-09-01 08:09:03,675 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/config.yaml
105
+ 2024-09-01 08:09:03,676 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
106
+ 2024-09-01 08:09:03,815 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
107
+ 2024-09-01 08:09:03,817 DEBUG SenderThread:1314 [sender.py:send():391] send: history
108
+ 2024-09-01 08:09:03,817 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
109
+ 2024-09-01 08:09:03,817 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
110
+ 2024-09-01 08:09:04,676 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
111
+ 2024-09-01 08:09:05,676 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
112
+ 2024-09-01 08:09:07,247 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
113
+ 2024-09-01 08:09:07,248 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: partial_history
114
+ 2024-09-01 08:09:07,249 DEBUG SenderThread:1314 [sender.py:send():391] send: history
115
+ 2024-09-01 08:09:07,249 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
116
+ 2024-09-01 08:09:07,250 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
117
+ 2024-09-01 08:09:07,678 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
118
+ 2024-09-01 08:09:07,678 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
119
+ 2024-09-01 08:09:08,251 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
120
+ 2024-09-01 08:09:09,678 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
121
+ 2024-09-01 08:09:10,825 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: internal_messages
122
+ 2024-09-01 08:09:13,826 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
123
+ 2024-09-01 08:09:15,810 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: stop_status
124
+ 2024-09-01 08:09:15,811 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: stop_status
125
+ 2024-09-01 08:09:18,980 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
126
+ 2024-09-01 08:09:20,837 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: internal_messages
127
+ 2024-09-01 08:09:24,838 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
128
+ 2024-09-01 08:09:29,702 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: check_version
129
+ 2024-09-01 08:09:29,703 DEBUG SenderThread:1314 [sender.py:send():391] send: telemetry
130
+ 2024-09-01 08:09:29,703 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: check_version
131
+ 2024-09-01 08:09:29,803 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: server_info
132
+ 2024-09-01 08:09:29,803 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: server_info
133
+ 2024-09-01 08:09:29,870 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
134
+ 2024-09-01 08:09:29,871 DEBUG SenderThread:1314 [sender.py:send():391] send: exit
135
+ 2024-09-01 08:09:29,872 INFO SenderThread:1314 [sender.py:send_exit():598] handling exit code: 0
136
+ 2024-09-01 08:09:29,872 INFO SenderThread:1314 [sender.py:send_exit():600] handling runtime: 59
137
+ 2024-09-01 08:09:29,874 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
138
+ 2024-09-01 08:09:29,875 INFO SenderThread:1314 [sender.py:send_exit():606] send defer
139
+ 2024-09-01 08:09:29,875 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
140
+ 2024-09-01 08:09:29,875 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 0
141
+ 2024-09-01 08:09:29,875 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
142
+ 2024-09-01 08:09:29,875 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 0
143
+ 2024-09-01 08:09:29,876 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 1
144
+ 2024-09-01 08:09:29,876 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
145
+ 2024-09-01 08:09:29,876 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 1
146
+ 2024-09-01 08:09:29,876 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
147
+ 2024-09-01 08:09:29,876 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 1
148
+ 2024-09-01 08:09:29,876 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 2
149
+ 2024-09-01 08:09:29,876 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
150
+ 2024-09-01 08:09:29,877 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 2
151
+ 2024-09-01 08:09:29,877 INFO HandlerThread:1314 [system_monitor.py:finish():203] Stopping system monitor
152
+ 2024-09-01 08:09:29,877 INFO HandlerThread:1314 [interfaces.py:finish():200] Joined cpu monitor
153
+ 2024-09-01 08:09:29,878 DEBUG SystemMonitor:1314 [system_monitor.py:_start():172] Starting system metrics aggregation loop
154
+ 2024-09-01 08:09:29,878 INFO HandlerThread:1314 [interfaces.py:finish():200] Joined disk monitor
155
+ 2024-09-01 08:09:29,878 DEBUG SystemMonitor:1314 [system_monitor.py:_start():179] Finished system metrics aggregation loop
156
+ 2024-09-01 08:09:29,879 DEBUG SystemMonitor:1314 [system_monitor.py:_start():183] Publishing last batch of metrics
157
+ 2024-09-01 08:09:29,895 INFO HandlerThread:1314 [interfaces.py:finish():200] Joined gpu monitor
158
+ 2024-09-01 08:09:29,895 INFO HandlerThread:1314 [interfaces.py:finish():200] Joined memory monitor
159
+ 2024-09-01 08:09:29,896 INFO HandlerThread:1314 [interfaces.py:finish():200] Joined network monitor
160
+ 2024-09-01 08:09:29,896 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
161
+ 2024-09-01 08:09:29,896 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 2
162
+ 2024-09-01 08:09:29,896 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 3
163
+ 2024-09-01 08:09:29,896 DEBUG SenderThread:1314 [sender.py:send():391] send: stats
164
+ 2024-09-01 08:09:29,897 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
165
+ 2024-09-01 08:09:29,897 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 3
166
+ 2024-09-01 08:09:29,899 DEBUG SenderThread:1314 [sender.py:send():391] send: history
167
+ 2024-09-01 08:09:29,899 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: summary_record
168
+ 2024-09-01 08:09:29,899 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
169
+ 2024-09-01 08:09:29,900 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
170
+ 2024-09-01 08:09:29,900 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 3
171
+ 2024-09-01 08:09:29,900 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 4
172
+ 2024-09-01 08:09:29,900 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
173
+ 2024-09-01 08:09:29,900 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 4
174
+ 2024-09-01 08:09:29,901 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
175
+ 2024-09-01 08:09:29,901 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 4
176
+ 2024-09-01 08:09:29,901 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 5
177
+ 2024-09-01 08:09:29,901 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
178
+ 2024-09-01 08:09:29,901 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 5
179
+ 2024-09-01 08:09:29,901 DEBUG SenderThread:1314 [sender.py:send():391] send: summary
180
+ 2024-09-01 08:09:29,902 INFO SenderThread:1314 [sender.py:_save_file():1466] saving file wandb-summary.json with policy end
181
+ 2024-09-01 08:09:29,902 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
182
+ 2024-09-01 08:09:29,902 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 5
183
+ 2024-09-01 08:09:29,902 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 6
184
+ 2024-09-01 08:09:29,902 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
185
+ 2024-09-01 08:09:29,903 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 6
186
+ 2024-09-01 08:09:29,903 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
187
+ 2024-09-01 08:09:29,903 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 6
188
+ 2024-09-01 08:09:29,908 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: status_report
189
+ 2024-09-01 08:09:30,006 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 7
190
+ 2024-09-01 08:09:30,006 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
191
+ 2024-09-01 08:09:30,006 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 7
192
+ 2024-09-01 08:09:30,006 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
193
+ 2024-09-01 08:09:30,007 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 7
194
+ 2024-09-01 08:09:30,687 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/config.yaml
195
+ 2024-09-01 08:09:30,687 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
196
+ 2024-09-01 08:09:30,872 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: poll_exit
197
+ 2024-09-01 08:09:31,156 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 8
198
+ 2024-09-01 08:09:31,157 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: poll_exit
199
+ 2024-09-01 08:09:31,157 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
200
+ 2024-09-01 08:09:31,157 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 8
201
+ 2024-09-01 08:09:31,158 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
202
+ 2024-09-01 08:09:31,158 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 8
203
+ 2024-09-01 08:09:31,158 INFO SenderThread:1314 [job_builder.py:build():443] Attempting to build job artifact
204
+ 2024-09-01 08:09:31,159 INFO SenderThread:1314 [job_builder.py:_get_source_type():572] is repo sourced job
205
+ 2024-09-01 08:09:31,159 WARNING SenderThread:1314 [job_builder.py:_log_if_verbose():274] No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job
206
+ 2024-09-01 08:09:31,159 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 9
207
+ 2024-09-01 08:09:31,159 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
208
+ 2024-09-01 08:09:31,159 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 9
209
+ 2024-09-01 08:09:31,159 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
210
+ 2024-09-01 08:09:31,160 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 9
211
+ 2024-09-01 08:09:31,160 INFO SenderThread:1314 [dir_watcher.py:finish():358] shutting down directory watcher
212
+ 2024-09-01 08:09:31,687 INFO Thread-12 :1314 [dir_watcher.py:_on_file_modified():288] file/dir modified: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
213
+ 2024-09-01 08:09:31,688 INFO SenderThread:1314 [dir_watcher.py:finish():388] scan: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files
214
+ 2024-09-01 08:09:31,688 INFO SenderThread:1314 [dir_watcher.py:finish():402] scan save: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/config.yaml config.yaml
215
+ 2024-09-01 08:09:31,688 INFO SenderThread:1314 [dir_watcher.py:finish():402] scan save: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-metadata.json wandb-metadata.json
216
+ 2024-09-01 08:09:31,688 INFO SenderThread:1314 [dir_watcher.py:finish():402] scan save: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/requirements.txt requirements.txt
217
+ 2024-09-01 08:09:31,691 INFO SenderThread:1314 [dir_watcher.py:finish():402] scan save: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log output.log
218
+ 2024-09-01 08:09:31,694 INFO SenderThread:1314 [dir_watcher.py:finish():402] scan save: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json wandb-summary.json
219
+ 2024-09-01 08:09:31,696 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 10
220
+ 2024-09-01 08:09:31,696 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
221
+ 2024-09-01 08:09:31,698 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 10
222
+ 2024-09-01 08:09:31,698 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
223
+ 2024-09-01 08:09:31,698 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 10
224
+ 2024-09-01 08:09:31,698 INFO SenderThread:1314 [file_pusher.py:finish():169] shutting down file pusher
225
+ 2024-09-01 08:09:31,872 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: poll_exit
226
+ 2024-09-01 08:09:31,873 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: poll_exit
227
+ 2024-09-01 08:09:31,928 INFO wandb-upload_0:1314 [upload_job.py:push():130] Uploaded file content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/config.yaml
228
+ 2024-09-01 08:09:32,038 INFO wandb-upload_1:1314 [upload_job.py:push():130] Uploaded file content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/requirements.txt
229
+ 2024-09-01 08:09:32,135 INFO wandb-upload_3:1314 [upload_job.py:push():130] Uploaded file content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/wandb-summary.json
230
+ 2024-09-01 08:09:32,170 INFO wandb-upload_2:1314 [upload_job.py:push():130] Uploaded file content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/files/output.log
231
+ 2024-09-01 08:09:32,370 INFO Thread-11 (_thread_body):1314 [sender.py:transition_state():626] send defer: 11
232
+ 2024-09-01 08:09:32,370 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
233
+ 2024-09-01 08:09:32,370 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 11
234
+ 2024-09-01 08:09:32,371 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
235
+ 2024-09-01 08:09:32,371 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 11
236
+ 2024-09-01 08:09:32,371 INFO SenderThread:1314 [file_pusher.py:join():175] waiting for file pusher
237
+ 2024-09-01 08:09:32,372 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 12
238
+ 2024-09-01 08:09:32,372 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
239
+ 2024-09-01 08:09:32,372 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 12
240
+ 2024-09-01 08:09:32,372 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
241
+ 2024-09-01 08:09:32,373 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 12
242
+ 2024-09-01 08:09:32,373 INFO SenderThread:1314 [file_stream.py:finish():601] file stream finish called
243
+ 2024-09-01 08:09:32,612 INFO SenderThread:1314 [file_stream.py:finish():605] file stream finish is done
244
+ 2024-09-01 08:09:32,612 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 13
245
+ 2024-09-01 08:09:32,612 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
246
+ 2024-09-01 08:09:32,612 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 13
247
+ 2024-09-01 08:09:32,613 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
248
+ 2024-09-01 08:09:32,613 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 13
249
+ 2024-09-01 08:09:32,613 INFO SenderThread:1314 [sender.py:transition_state():626] send defer: 14
250
+ 2024-09-01 08:09:32,613 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: defer
251
+ 2024-09-01 08:09:32,613 INFO HandlerThread:1314 [handler.py:handle_request_defer():184] handle defer: 14
252
+ 2024-09-01 08:09:32,613 DEBUG SenderThread:1314 [sender.py:send():391] send: final
253
+ 2024-09-01 08:09:32,614 DEBUG SenderThread:1314 [sender.py:send():391] send: footer
254
+ 2024-09-01 08:09:32,614 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: defer
255
+ 2024-09-01 08:09:32,614 INFO SenderThread:1314 [sender.py:send_request_defer():622] handle sender defer: 14
256
+ 2024-09-01 08:09:32,615 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: poll_exit
257
+ 2024-09-01 08:09:32,615 DEBUG SenderThread:1314 [sender.py:send_request():418] send_request: poll_exit
258
+ 2024-09-01 08:09:32,616 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: internal_messages
259
+ 2024-09-01 08:09:32,616 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: get_summary
260
+ 2024-09-01 08:09:32,617 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: sampled_history
261
+ 2024-09-01 08:09:32,618 DEBUG HandlerThread:1314 [handler.py:handle_request():158] handle_request: shutdown
262
+ 2024-09-01 08:09:32,618 INFO HandlerThread:1314 [handler.py:finish():884] shutting down handler
263
+ 2024-09-01 08:09:33,615 INFO WriterThread:1314 [datastore.py:close():296] close: content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/run-ra4ttuu9.wandb
264
+ 2024-09-01 08:09:33,615 INFO SenderThread:1314 [sender.py:finish():1631] shutting down sender
265
+ 2024-09-01 08:09:33,616 INFO SenderThread:1314 [file_pusher.py:finish():169] shutting down file pusher
266
+ 2024-09-01 08:09:33,616 INFO SenderThread:1314 [file_pusher.py:join():175] waiting for file pusher
wandb/run-20240901_080830-ra4ttuu9/logs/debug.log ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Current SDK version is 0.17.8
2
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Configure stats pid to 1234
3
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Loading settings from /root/.config/wandb/settings
4
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Loading settings from /workspace/mistral-finetune/wandb/settings
5
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Loading settings from environment variables: {}
6
+ 2024-09-01 08:08:30,428 WARNING MainThread:1234 [wandb_setup.py:_flush():77] Could not find program at -m train
7
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m train'}
8
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Applying login settings: {'api_key': '***REDACTED***'}
9
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Applying login settings: {'api_key': '***REDACTED***'}
10
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_setup.py:_flush():77] Applying login settings: {}
11
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_init.py:_log_setup():524] Logging user logs to content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/logs/debug.log
12
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_init.py:_log_setup():525] Logging internal logs to content/test_ultra_1/wandb/run-20240901_080830-ra4ttuu9/logs/debug-internal.log
13
+ 2024-09-01 08:08:30,428 INFO MainThread:1234 [wandb_init.py:init():607] calling init triggers
14
+ 2024-09-01 08:08:30,429 INFO MainThread:1234 [wandb_init.py:init():614] wandb.init called with sweep_config: {}
15
+ config: {'data': {'data': '', 'shuffle': False, 'instruct_data': './data/t_train.jsonl', 'eval_instruct_data': './data/t_eval.jsonl', 'instruct': {'shuffle': True, 'dynamic_chunk_fn_call': True}}, 'model_id_or_path': '../downloaded_model/mistral_models/7B-v0.3/', 'run_dir': './content/test_ultra_1', 'optim': {'lr': 0.0001, 'weight_decay': 0.1, 'pct_start': 0.05}, 'seed': 0, 'num_microbatches': 4, 'seq_len': 1024, 'batch_size': 1, 'max_norm': 1.0, 'max_steps': 10, 'log_freq': 1, 'ckpt_freq': 10, 'save_adapters': False, 'no_ckpt': False, 'num_ckpt_keep': 3, 'eval_freq': 5, 'no_eval': False, 'checkpoint': True, 'world_size': 1, 'wandb': {'project': 'finetuning_mistral', 'offline': False, 'key': '9e930bac040e715c4be9de064318956067b45479', 'run_name': None}, 'mlflow': {'tracking_uri': None, 'experiment_name': None}, 'lora': {'enable': True, 'rank': 16, 'dropout': 0.05, 'scaling': 2.0}}
16
+ 2024-09-01 08:08:30,429 INFO MainThread:1234 [wandb_init.py:init():657] starting backend
17
+ 2024-09-01 08:08:30,429 INFO MainThread:1234 [wandb_init.py:init():661] setting up manager
18
+ 2024-09-01 08:08:30,431 INFO MainThread:1234 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
19
+ 2024-09-01 08:08:30,432 INFO MainThread:1234 [wandb_init.py:init():669] backend started and connected
20
+ 2024-09-01 08:08:30,436 INFO MainThread:1234 [wandb_init.py:init():767] updated telemetry
21
+ 2024-09-01 08:08:30,441 INFO MainThread:1234 [wandb_init.py:init():800] communicating run to backend with 90.0 second timeout
22
+ 2024-09-01 08:08:30,665 INFO MainThread:1234 [wandb_init.py:init():851] starting run threads in backend
23
+ 2024-09-01 08:08:30,808 INFO MainThread:1234 [wandb_run.py:_console_start():2463] atexit reg
24
+ 2024-09-01 08:08:30,808 INFO MainThread:1234 [wandb_run.py:_redirect():2309] redirect: wrap_raw
25
+ 2024-09-01 08:08:30,808 INFO MainThread:1234 [wandb_run.py:_redirect():2374] Wrapping output streams.
26
+ 2024-09-01 08:08:30,808 INFO MainThread:1234 [wandb_run.py:_redirect():2399] Redirects installed.
27
+ 2024-09-01 08:08:30,810 INFO MainThread:1234 [wandb_init.py:init():894] run started, returning control to user process
28
+ 2024-09-01 08:09:29,670 INFO MainThread:1234 [wandb_run.py:_finish():2160] finishing run dwivedi-rishabh95-avizva/finetuning_mistral/ra4ttuu9
29
+ 2024-09-01 08:09:29,671 INFO MainThread:1234 [wandb_run.py:_atexit_cleanup():2424] got exitcode: 0
30
+ 2024-09-01 08:09:29,671 INFO MainThread:1234 [wandb_run.py:_restore():2406] restore
31
+ 2024-09-01 08:09:29,671 INFO MainThread:1234 [wandb_run.py:_restore():2412] restore done
32
+ 2024-09-01 08:09:29,671 INFO MainThread:1234 [wandb_run.py:_on_finish():2677] communicating current version
33
+ 2024-09-01 08:09:29,802 INFO MainThread:1234 [wandb_run.py:_on_finish():2686] got version response
34
+ 2024-09-01 08:09:33,620 INFO MainThread:1234 [wandb_run.py:_footer_history_summary_info():4078] rendering history
35
+ 2024-09-01 08:09:33,620 INFO MainThread:1234 [wandb_run.py:_footer_history_summary_info():4110] rendering summary
36
+ 2024-09-01 08:09:33,629 INFO MainThread:1234 [wandb_run.py:_footer_sync_info():4037] logging synced files
wandb/run-20240901_080830-ra4ttuu9/run-ra4ttuu9.wandb ADDED
Binary file (19.9 kB). View file