Stewart Whiting commited on
Commit
04314e4
1 Parent(s): b84e030

Upload 7 files

Browse files
governmentgpt/args.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ batch_size: 1
2
+ checkpoint: true
3
+ ckpt_freq: 100
4
+ data:
5
+ data: /content/data/HansardSequences_250k.big.txt
6
+ eval_instruct_data: ''
7
+ instruct:
8
+ dynamic_chunk_fn_call: true
9
+ shuffle: true
10
+ instruct_data: ''
11
+ shuffle: false
12
+ eval_freq: 100
13
+ log_freq: 1
14
+ lora:
15
+ dropout: 0.0
16
+ enable: true
17
+ rank: 64
18
+ scaling: 2.0
19
+ max_norm: 1.0
20
+ max_steps: 100
21
+ mlflow:
22
+ experiment_name: null
23
+ tracking_uri: null
24
+ model_id_or_path: /content/mistral_models/7B-v0.3
25
+ no_ckpt: false
26
+ no_eval: true
27
+ num_ckpt_keep: 3
28
+ num_microbatches: 8
29
+ optim:
30
+ lr: 0.0001
31
+ pct_start: 0.05
32
+ weight_decay: 0.1
33
+ run_dir: /content/debategpt
34
+ save_adapters: true
35
+ seed: 0
36
+ seq_len: 8192
37
+ wandb:
38
+ key: null
39
+ offline: false
40
+ project: null
41
+ run_name: null
42
+ world_size: 1
governmentgpt/checkpoints/checkpoint_000100/consolidated/lora.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ada00474e0a8f9c783d4d8bb0371f0b9f84d2efcb6ed282608222f45f751dc71
3
+ size 335594288
governmentgpt/checkpoints/checkpoint_000100/consolidated/params.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dim": 4096,
3
+ "n_layers": 32,
4
+ "head_dim": 128,
5
+ "hidden_dim": 14336,
6
+ "n_heads": 32,
7
+ "n_kv_heads": 8,
8
+ "norm_eps": 1e-05,
9
+ "vocab_size": 32768,
10
+ "rope_theta": 1000000.0,
11
+ "lora": {
12
+ "enable": true,
13
+ "rank": 64,
14
+ "dropout": 0.0,
15
+ "scaling": 2.0
16
+ },
17
+ "moe": null
18
+ }
governmentgpt/checkpoints/checkpoint_000100/consolidated/tokenizer.model.v3 ADDED
Binary file (588 kB). View file
 
governmentgpt/metrics.train.jsonl ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"lr": 4.000000000000002e-06, "step": 1, "loss": 1.3765387535095215, "percent_done": 1.0, "peak_allocated_mem": 20.97185754776001, "allocated_mem": 17.088991165161133, "wps": 3181.200367002771, "avg_wps": 3181.200367002771, "eta_in_seconds": 2039.5018393993378, "at": "2024-06-07T10:04:04.491457"}
2
+ {"lr": 1.8058874503045725e-05, "step": 2, "loss": 1.4619245529174805, "percent_done": 2.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3444.076975004867, "avg_wps": 3307.4234847204816, "eta_in_seconds": 1941.8523299694061, "at": "2024-06-07T10:04:23.521608"}
3
+ {"lr": 5.2000000000000004e-05, "step": 3, "loss": 1.4587992429733276, "percent_done": 3.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3372.7162252529756, "avg_wps": 3328.90501641758, "eta_in_seconds": 1909.6345400810242, "at": "2024-06-07T10:04:42.954304"}
4
+ {"lr": 8.594112549695428e-05, "step": 4, "loss": 1.390647053718567, "percent_done": 4.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3437.7511971408117, "avg_wps": 3355.4652263845883, "eta_in_seconds": 1874.9876918792725, "at": "2024-06-07T10:05:02.019421"}
5
+ {"lr": 0.0001, "step": 5, "loss": 1.3330976963043213, "percent_done": 5.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3381.186610497474, "avg_wps": 3360.578148642961, "eta_in_seconds": 1852.6336019039152, "at": "2024-06-07T10:05:21.403426"}
6
+ {"lr": 9.997266297639484e-05, "step": 6, "loss": 1.3474574089050293, "percent_done": 6.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3347.039564846475, "avg_wps": 3358.314117190209, "eta_in_seconds": 1834.3680147329967, "at": "2024-06-07T10:05:40.985333"}
7
+ {"lr": 9.989068179821329e-05, "step": 7, "loss": 1.3682441711425781, "percent_done": 7.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3372.721232578016, "avg_wps": 3360.3647361398084, "eta_in_seconds": 1813.7459706238337, "at": "2024-06-07T10:06:00.418028"}
8
+ {"lr": 9.975414611067007e-05, "step": 8, "loss": 1.3870329856872559, "percent_done": 8.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3414.406015640926, "avg_wps": 3367.026158393041, "eta_in_seconds": 1790.6935427188873, "at": "2024-06-07T10:06:19.613505"}
9
+ {"lr": 9.95632052135349e-05, "step": 9, "loss": 1.3421063423156738, "percent_done": 9.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3390.838261544151, "avg_wps": 3369.655419171514, "eta_in_seconds": 1769.8474348651039, "at": "2024-06-07T10:06:38.942509"}
10
+ {"lr": 9.931806789787545e-05, "step": 10, "loss": 1.3948217630386353, "percent_done": 10.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3394.379408535998, "avg_wps": 3372.111598674955, "eta_in_seconds": 1749.1236062049866, "at": "2024-06-07T10:06:58.251104"}
11
+ {"lr": 9.90190022177473e-05, "step": 11, "loss": 1.315626621246338, "percent_done": 11.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3364.375716091919, "avg_wps": 3371.406866886714, "eta_in_seconds": 1730.0504597317088, "at": "2024-06-07T10:07:17.731986"}
12
+ {"lr": 9.866633519708085e-05, "step": 12, "loss": 1.3564515113830566, "percent_done": 12.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3405.324975555631, "avg_wps": 3374.207547661055, "eta_in_seconds": 1709.191837946574, "at": "2024-06-07T10:07:36.978757"}
13
+ {"lr": 9.826045247208538e-05, "step": 13, "loss": 1.3700439929962158, "percent_done": 13.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3347.9499550749147, "avg_wps": 3372.1731190364717, "eta_in_seconds": 1690.788639472081, "at": "2024-06-07T10:07:56.555203"}
14
+ {"lr": 9.780179786956158e-05, "step": 14, "loss": 1.329624056816101, "percent_done": 14.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3436.172791204611, "avg_wps": 3376.6653568710103, "eta_in_seconds": 1669.1307560375758, "at": "2024-06-07T10:08:15.629215"}
15
+ {"lr": 9.729087292158339e-05, "step": 15, "loss": 1.2524995803833008, "percent_done": 15.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3388.1745293445474, "avg_wps": 3377.430201900378, "eta_in_seconds": 1649.3486665884654, "at": "2024-06-07T10:08:34.973266"}
16
+ {"lr": 9.672823631708012e-05, "step": 16, "loss": 1.3875758647918701, "percent_done": 16.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3363.1833661713595, "avg_wps": 3376.536239397106, "eta_in_seconds": 1630.3761042952538, "at": "2024-06-07T10:08:54.460976"}
17
+ {"lr": 9.611450329091824e-05, "step": 17, "loss": 1.3539988994598389, "percent_done": 17.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3396.120448242881, "avg_wps": 3377.681997097675, "eta_in_seconds": 1610.4204021201413, "at": "2024-06-07T10:09:13.759985"}
18
+ {"lr": 9.545034495115111e-05, "step": 18, "loss": 1.32791006565094, "percent_done": 18.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3389.209273059628, "avg_wps": 3378.320343812424, "eta_in_seconds": 1590.7171177069347, "at": "2024-06-07T10:09:33.098120"}
19
+ {"lr": 9.473648754517221e-05, "step": 19, "loss": 1.3495287895202637, "percent_done": 19.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3420.2401336432176, "avg_wps": 3380.501013905266, "eta_in_seconds": 1570.3045134920824, "at": "2024-06-07T10:09:52.260776"}
20
+ {"lr": 9.397371166557421e-05, "step": 20, "loss": 1.3365004062652588, "percent_done": 20.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3359.5175172037866, "avg_wps": 3379.4456155398257, "eta_in_seconds": 1551.4023885726929, "at": "2024-06-07T10:10:11.769732"}
21
+ {"lr": 9.316285139658251e-05, "step": 21, "loss": 1.3371220827102661, "percent_done": 21.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3368.3734448787122, "avg_wps": 3378.9167189956765, "eta_in_seconds": 1532.2496618202754, "at": "2024-06-07T10:10:31.227427"}
22
+ {"lr": 9.230479340199629e-05, "step": 22, "loss": 1.382455587387085, "percent_done": 22.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3404.665469426161, "avg_wps": 3380.0786647107093, "eta_in_seconds": 1512.3340333375063, "at": "2024-06-07T10:10:50.477760"}
23
+ {"lr": 9.140047595563478e-05, "step": 23, "loss": 1.352661371231079, "percent_done": 23.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3385.930461761261, "avg_wps": 3380.3326700395487, "eta_in_seconds": 1492.8329524268274, "at": "2024-06-07T10:11:09.834564"}
24
+ {"lr": 9.045088791534851e-05, "step": 24, "loss": 1.3765251636505127, "percent_done": 24.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3363.709911291755, "avg_wps": 3379.6367756221507, "eta_in_seconds": 1473.7489057779312, "at": "2024-06-07T10:11:29.319467"}
25
+ {"lr": 8.945706764171781e-05, "step": 25, "loss": 1.3368202447891235, "percent_done": 25.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3424.2913532048387, "avg_wps": 3381.40058599026, "eta_in_seconds": 1453.5988490581512, "at": "2024-06-07T10:11:48.459468"}
26
+ {"lr": 8.842010186262079e-05, "step": 26, "loss": 1.350569486618042, "percent_done": 26.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3367.8799766064844, "avg_wps": 3380.8785554839633, "eta_in_seconds": 1434.4389839539162, "at": "2024-06-07T10:12:07.920096"}
27
+ {"lr": 8.73411244849123e-05, "step": 27, "loss": 1.345234751701355, "percent_done": 27.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3393.679679583234, "avg_wps": 3381.3509487998786, "eta_in_seconds": 1414.8569824430679, "at": "2024-06-07T10:12:27.232742"}
28
+ {"lr": 8.62213153545135e-05, "step": 28, "loss": 1.4049516916275024, "percent_done": 28.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3385.5065143784745, "avg_wps": 3381.4991861839244, "eta_in_seconds": 1395.4142054149083, "at": "2024-06-07T10:12:46.592035"}
29
+ {"lr": 8.506189896626757e-05, "step": 29, "loss": 1.4302780628204346, "percent_done": 29.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3421.655278725807, "avg_wps": 3382.8681824709765, "eta_in_seconds": 1375.4765923516504, "at": "2024-06-07T10:13:05.746787"}
30
+ {"lr": 8.386414312497274e-05, "step": 30, "loss": 1.3352062702178955, "percent_done": 30.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3402.904706615275, "avg_wps": 3383.53226440248, "eta_in_seconds": 1355.8375217119851, "at": "2024-06-07T10:13:25.007084"}
31
+ {"lr": 8.262935755905613e-05, "step": 31, "loss": 1.4035916328430176, "percent_done": 31.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3350.540049466917, "avg_wps": 3382.457861036068, "eta_in_seconds": 1336.8929298693135, "at": "2024-06-07T10:13:44.568500"}
32
+ {"lr": 8.135889248840516e-05, "step": 32, "loss": 1.2784558534622192, "percent_done": 32.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3383.8732706174524, "avg_wps": 3382.502074662202, "eta_in_seconds": 1317.5004483759403, "at": "2024-06-07T10:14:03.937078"}
33
+ {"lr": 8.00541371479218e-05, "step": 33, "loss": 1.3766213655471802, "percent_done": 33.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3355.5401551214254, "avg_wps": 3381.678682425525, "eta_in_seconds": 1298.4415174686546, "at": "2024-06-07T10:14:23.469172"}
34
+ {"lr": 7.871651826841464e-05, "step": 34, "loss": 1.3945895433425903, "percent_done": 34.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3347.8620416029103, "avg_wps": 3380.674327231238, "eta_in_seconds": 1279.4417862611658, "at": "2024-06-07T10:14:43.046116"}
35
+ {"lr": 7.734749851648974e-05, "step": 35, "loss": 1.3306955099105835, "percent_done": 35.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3344.451988412787, "avg_wps": 3379.628518063561, "eta_in_seconds": 1260.446222782135, "at": "2024-06-07T10:15:02.643040"}
36
+ {"lr": 7.594857489514601e-05, "step": 36, "loss": 1.3251088857650757, "percent_done": 36.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3357.3130003946535, "avg_wps": 3379.0046375502216, "eta_in_seconds": 1241.2838838365342, "at": "2024-06-07T10:15:22.164984"}
37
+ {"lr": 7.452127710682466e-05, "step": 37, "loss": 1.3140506744384766, "percent_done": 37.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3413.9977201008533, "avg_wps": 3379.9409620018787, "eta_in_seconds": 1221.5503307355416, "at": "2024-06-07T10:15:41.362742"}
38
+ {"lr": 7.30671658807018e-05, "step": 38, "loss": 1.3560665845870972, "percent_done": 38.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3381.517748176451, "avg_wps": 3379.982437535202, "eta_in_seconds": 1202.1458913150586, "at": "2024-06-07T10:16:00.745000"}
39
+ {"lr": 7.158783126605418e-05, "step": 39, "loss": 1.4378803968429565, "percent_done": 39.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3279.9334231447865, "avg_wps": 3377.34089214523, "eta_in_seconds": 1183.6815197712338, "at": "2024-06-07T10:16:20.727469"}
40
+ {"lr": 7.008489089356355e-05, "step": 40, "loss": 1.3525066375732422, "percent_done": 40.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3406.769306916459, "avg_wps": 3378.0704048304447, "eta_in_seconds": 1164.0254727602005, "at": "2024-06-07T10:16:39.965909"}
41
+ {"lr": 6.855998820646126e-05, "step": 41, "loss": 1.2807648181915283, "percent_done": 41.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3391.977179203981, "avg_wps": 3378.4082375875482, "eta_in_seconds": 1144.510588442407, "at": "2024-06-07T10:16:59.288209"}
42
+ {"lr": 6.701479066344726e-05, "step": 42, "loss": 1.2767056226730347, "percent_done": 42.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3385.6098016317833, "avg_wps": 3378.5793473374406, "eta_in_seconds": 1125.055122057597, "at": "2024-06-07T10:17:18.646837"}
43
+ {"lr": 6.54509879153485e-05, "step": 43, "loss": 1.3247581720352173, "percent_done": 43.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3360.7356612481767, "avg_wps": 3378.1622261348807, "eta_in_seconds": 1105.794141885846, "at": "2024-06-07T10:17:38.148781"}
44
+ {"lr": 6.387028995751049e-05, "step": 44, "loss": 1.3494842052459717, "percent_done": 44.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3371.770728099371, "avg_wps": 3378.0166957295364, "eta_in_seconds": 1086.4410482753408, "at": "2024-06-07T10:17:57.586982"}
45
+ {"lr": 6.227442525994253e-05, "step": 45, "loss": 1.37782883644104, "percent_done": 45.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3402.4364881711144, "avg_wps": 3378.5555489604476, "eta_in_seconds": 1066.8701306713951, "at": "2024-06-07T10:18:16.849965"}
46
+ {"lr": 6.06651388772612e-05, "step": 46, "loss": 1.3752127885818481, "percent_done": 46.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3379.714766820037, "avg_wps": 3378.5807408929877, "eta_in_seconds": 1047.464681594268, "at": "2024-06-07T10:18:36.242361"}
47
+ {"lr": 5.904419054049864e-05, "step": 47, "loss": 1.3254079818725586, "percent_done": 47.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3345.174062811403, "avg_wps": 3377.8630147812587, "eta_in_seconds": 1028.2856305304995, "at": "2024-06-07T10:18:55.834972"}
48
+ {"lr": 5.741335273286256e-05, "step": 48, "loss": 1.3447790145874023, "percent_done": 48.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3378.309763056493, "avg_wps": 3377.8723208318374, "eta_in_seconds": 1008.8812353809674, "at": "2024-06-07T10:19:15.235445"}
49
+ {"lr": 5.5774408751551925e-05, "step": 49, "loss": 1.33303701877594, "percent_done": 49.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3341.159660878464, "avg_wps": 3377.11502006828, "eta_in_seconds": 989.7015589159363, "at": "2024-06-07T10:19:34.851677"}
50
+ {"lr": 5.412915075774753e-05, "step": 50, "loss": 1.2656023502349854, "percent_done": 50.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3397.10165187303, "avg_wps": 3377.5124476676, "eta_in_seconds": 970.1814725399017, "at": "2024-06-07T10:19:54.144842"}
51
+ {"lr": 5.247937781691018e-05, "step": 51, "loss": 1.3504137992858887, "percent_done": 51.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3400.231579997644, "avg_wps": 3377.955002338164, "eta_in_seconds": 950.653279211007, "at": "2024-06-07T10:20:13.420242"}
52
+ {"lr": 5.082689393152898e-05, "step": 52, "loss": 1.2954485416412354, "percent_done": 52.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3372.6115714241823, "avg_wps": 3377.852084380988, "eta_in_seconds": 931.2805657019982, "at": "2024-06-07T10:20:32.853545"}
53
+ {"lr": 4.917350606847103e-05, "step": 53, "loss": 1.3525289297103882, "percent_done": 53.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3376.128125430993, "avg_wps": 3377.81954055758, "eta_in_seconds": 911.8876728066857, "at": "2024-06-07T10:20:52.266614"}
54
+ {"lr": 4.752102218308982e-05, "step": 54, "loss": 1.364650011062622, "percent_done": 54.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3334.6514123480847, "avg_wps": 3377.009976236175, "eta_in_seconds": 892.699761390686, "at": "2024-06-07T10:21:11.921216"}
55
+ {"lr": 4.587124924225248e-05, "step": 55, "loss": 1.322001576423645, "percent_done": 55.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3386.6769021443133, "avg_wps": 3377.1852459259503, "eta_in_seconds": 873.2479225288738, "at": "2024-06-07T10:21:31.273874"}
56
+ {"lr": 4.4225991248448094e-05, "step": 56, "loss": 1.3233798742294312, "percent_done": 56.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3376.654459581305, "avg_wps": 3377.175766135051, "eta_in_seconds": 853.8448098897934, "at": "2024-06-07T10:21:50.683943"}
57
+ {"lr": 4.258704726713744e-05, "step": 57, "loss": 1.3856163024902344, "percent_done": 57.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3390.241805633249, "avg_wps": 3377.4041268877554, "eta_in_seconds": 834.3828260187518, "at": "2024-06-07T10:22:10.016167"}
58
+ {"lr": 4.095620945950139e-05, "step": 58, "loss": 1.3582823276519775, "percent_done": 58.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3400.330846298791, "avg_wps": 3377.7967955713593, "eta_in_seconds": 814.8838330383959, "at": "2024-06-07T10:22:29.291068"}
59
+ {"lr": 3.933526112273882e-05, "step": 59, "loss": 1.2976412773132324, "percent_done": 59.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3378.5385546753237, "avg_wps": 3377.809365046108, "eta_in_seconds": 795.4788768735983, "at": "2024-06-07T10:22:48.690234"}
60
+ {"lr": 3.772597474005748e-05, "step": 60, "loss": 1.4162263870239258, "percent_done": 60.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3328.829508777257, "avg_wps": 3376.981225832671, "eta_in_seconds": 776.2672708829244, "at": "2024-06-07T10:23:08.379276"}
61
+ {"lr": 3.613011004248953e-05, "step": 61, "loss": 1.3515819311141968, "percent_done": 61.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3392.678969887042, "avg_wps": 3377.237394631227, "eta_in_seconds": 756.8031800379518, "at": "2024-06-07T10:23:27.697653"}
62
+ {"lr": 3.454941208465151e-05, "step": 62, "loss": 1.3230400085449219, "percent_done": 62.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3371.533382114311, "avg_wps": 3377.1452412965914, "eta_in_seconds": 737.4180919278053, "at": "2024-06-07T10:23:47.137268"}
63
+ {"lr": 3.2985609336552735e-05, "step": 63, "loss": 1.3042376041412354, "percent_done": 63.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3370.2625637461792, "avg_wps": 3377.035772889415, "eta_in_seconds": 718.0356274180942, "at": "2024-06-07T10:24:06.584125"}
64
+ {"lr": 3.144041179353874e-05, "step": 64, "loss": 1.3380427360534668, "percent_done": 64.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3390.0965918984016, "avg_wps": 3377.2390741961703, "eta_in_seconds": 698.5872033834457, "at": "2024-06-07T10:24:25.917352"}
65
+ {"lr": 2.991550910643647e-05, "step": 65, "loss": 1.3672306537628174, "percent_done": 65.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3337.616125276957, "avg_wps": 3376.6223662478556, "eta_in_seconds": 679.3060494202834, "at": "2024-06-07T10:24:45.554554"}
66
+ {"lr": 2.841256873394583e-05, "step": 66, "loss": 1.3195621967315674, "percent_done": 66.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3369.5094623957466, "avg_wps": 3376.5143709305453, "eta_in_seconds": 659.9184114788518, "at": "2024-06-07T10:25:05.005737"}
67
+ {"lr": 2.6933234119298215e-05, "step": 67, "loss": 1.310693621635437, "percent_done": 67.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3408.5397783266653, "avg_wps": 3376.987937463485, "eta_in_seconds": 640.4192256678397, "at": "2024-06-07T10:25:24.234172"}
68
+ {"lr": 2.5479122893175377e-05, "step": 68, "loss": 1.3552899360656738, "percent_done": 68.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3396.188675136809, "avg_wps": 3377.268728222436, "eta_in_seconds": 620.9609506270465, "at": "2024-06-07T10:25:43.532573"}
69
+ {"lr": 2.4051825104854013e-05, "step": 69, "loss": 1.385781168937683, "percent_done": 69.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3376.698055087941, "avg_wps": 3377.260456219796, "eta_in_seconds": 601.5573943248693, "at": "2024-06-07T10:26:02.942492"}
70
+ {"lr": 2.265290148351029e-05, "step": 70, "loss": 1.377860188484192, "percent_done": 70.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3348.694958319102, "avg_wps": 3376.848946778055, "eta_in_seconds": 582.2232593127659, "at": "2024-06-07T10:26:22.514685"}
71
+ {"lr": 2.1283881731585358e-05, "step": 71, "loss": 1.3846116065979004, "percent_done": 71.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3379.497906700583, "avg_wps": 3376.886227240077, "eta_in_seconds": 562.8096039093716, "at": "2024-06-07T10:26:41.908357"}
72
+ {"lr": 1.9946262852078203e-05, "step": 72, "loss": 1.39150869846344, "percent_done": 72.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3371.6095988326842, "avg_wps": 3376.8128276351867, "eta_in_seconds": 543.4141877757179, "at": "2024-06-07T10:27:01.347418"}
73
+ {"lr": 1.8641507511594846e-05, "step": 73, "loss": 1.4072656631469727, "percent_done": 73.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3431.3191237341875, "avg_wps": 3377.5477884804036, "eta_in_seconds": 523.8925133894568, "at": "2024-06-07T10:27:20.448185"}
74
+ {"lr": 1.737104244094387e-05, "step": 74, "loss": 1.3605914115905762, "percent_done": 74.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3326.4248936780564, "avg_wps": 3376.846466725391, "eta_in_seconds": 504.5938619922947, "at": "2024-06-07T10:27:40.151384"}
75
+ {"lr": 1.6136256875027284e-05, "step": 75, "loss": 1.3052914142608643, "percent_done": 75.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3391.702788230919, "avg_wps": 3377.043694882856, "eta_in_seconds": 485.1580696105957, "at": "2024-06-07T10:27:59.475353"}
76
+ {"lr": 1.4938501033732425e-05, "step": 76, "loss": 1.3419865369796753, "percent_done": 76.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3302.108187056243, "avg_wps": 3376.035626983033, "eta_in_seconds": 465.89081804375905, "at": "2024-06-07T10:28:19.323501"}
77
+ {"lr": 1.377908464548651e-05, "step": 77, "loss": 1.2833247184753418, "percent_done": 77.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3330.0516375636657, "avg_wps": 3375.43029433578, "eta_in_seconds": 446.5587698639213, "at": "2024-06-07T10:28:39.005063"}
78
+ {"lr": 1.2659275515087717e-05, "step": 78, "loss": 1.2505719661712646, "percent_done": 78.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3379.305917931613, "avg_wps": 3375.4799255624607, "eta_in_seconds": 427.13689069258857, "at": "2024-06-07T10:28:58.400057"}
79
+ {"lr": 1.1580298137379231e-05, "step": 79, "loss": 1.470693826675415, "percent_done": 79.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3390.9550094512733, "avg_wps": 3375.6749300076544, "eta_in_seconds": 407.6980244057088, "at": "2024-06-07T10:29:17.728215"}
80
+ {"lr": 1.0543332358282206e-05, "step": 80, "loss": 1.3144932985305786, "percent_done": 80.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3384.452777529919, "avg_wps": 3375.7843720741935, "eta_in_seconds": 388.27124470472336, "at": "2024-06-07T10:29:37.093596"}
81
+ {"lr": 9.549512084651507e-06, "step": 81, "loss": 1.331773281097412, "percent_done": 81.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3410.4243444117096, "avg_wps": 3376.207735423671, "eta_in_seconds": 368.8114291473671, "at": "2024-06-07T10:29:56.311519"}
82
+ {"lr": 8.599924044365227e-06, "step": 82, "loss": 1.325313925743103, "percent_done": 82.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3378.092460475999, "avg_wps": 3376.2307072082085, "eta_in_seconds": 349.39792398708624, "at": "2024-06-07T10:30:15.713283"}
83
+ {"lr": 7.695606598003707e-06, "step": 83, "loss": 1.3291065692901611, "percent_done": 83.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3370.9544460748034, "avg_wps": 3376.1670394979496, "eta_in_seconds": 329.99315109884884, "at": "2024-06-07T10:30:35.156170"}
84
+ {"lr": 6.837548603417497e-06, "step": 84, "loss": 1.3459031581878662, "percent_done": 84.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3373.2159964893153, "avg_wps": 3376.13187766516, "eta_in_seconds": 310.58502392541794, "at": "2024-06-07T10:30:54.585958"}
85
+ {"lr": 6.026688334425794e-06, "step": 85, "loss": 1.3451216220855713, "percent_done": 85.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3384.023992479452, "avg_wps": 3376.2245120782222, "eta_in_seconds": 291.16547092269445, "at": "2024-06-07T10:31:13.953838"}
86
+ {"lr": 5.26391245482779e-06, "step": 86, "loss": 1.2905868291854858, "percent_done": 86.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3394.1486760558496, "avg_wps": 3376.431844672881, "eta_in_seconds": 271.73775222135146, "at": "2024-06-07T10:31:33.263786"}
87
+ {"lr": 4.550055048848885e-06, "step": 87, "loss": 1.2742289304733276, "percent_done": 87.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3429.9269185167414, "avg_wps": 3377.037249003244, "eta_in_seconds": 252.28267773814585, "at": "2024-06-07T10:31:52.372327"}
88
+ {"lr": 3.885896709081758e-06, "step": 88, "loss": 1.2421263456344604, "percent_done": 88.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3382.7373314980828, "avg_wps": 3377.101914759315, "eta_in_seconds": 232.87185872684825, "at": "2024-06-07T10:32:11.747446"}
89
+ {"lr": 3.272163682919883e-06, "step": 89, "loss": 1.282299518585205, "percent_done": 89.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3380.617950428434, "avg_wps": 3377.1413801507483, "eta_in_seconds": 213.46337593003605, "at": "2024-06-07T10:32:31.134836"}
90
+ {"lr": 2.7095270784166083e-06, "step": 90, "loss": 1.3170578479766846, "percent_done": 90.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3379.1226743845195, "avg_wps": 3377.1633817666825, "eta_in_seconds": 194.05635023117065, "at": "2024-06-07T10:32:50.530637"}
91
+ {"lr": 2.198602130438425e-06, "step": 91, "loss": 1.34639573097229, "percent_done": 91.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3356.061810026018, "avg_wps": 3376.930054482078, "eta_in_seconds": 174.66278261142773, "at": "2024-06-07T10:33:10.059703"}
92
+ {"lr": 1.7399475279146274e-06, "step": 92, "loss": 1.345885992050171, "percent_done": 92.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3392.6308572049215, "avg_wps": 3377.099934122856, "eta_in_seconds": 155.24799686929455, "at": "2024-06-07T10:33:29.378281"}
93
+ {"lr": 1.3340648029191625e-06, "step": 93, "loss": 1.3385608196258545, "percent_done": 93.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3318.9010140618975, "avg_wps": 3376.463285638763, "eta_in_seconds": 135.8676109262692, "at": "2024-06-07T10:33:49.126111"}
94
+ {"lr": 9.813977822527072e-07, "step": 94, "loss": 1.3023641109466553, "percent_done": 94.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3416.9709508268925, "avg_wps": 3376.889163317068, "eta_in_seconds": 116.44326508298835, "at": "2024-06-07T10:34:08.307373"}
95
+ {"lr": 6.823321021245645e-07, "step": 95, "loss": 1.3978477716445923, "percent_done": 95.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3426.0643738566664, "avg_wps": 3377.399444473935, "eta_in_seconds": 97.02139334929618, "at": "2024-06-07T10:34:27.437513"}
96
+ {"lr": 4.3719478646510564e-07, "step": 96, "loss": 1.277753233909607, "percent_done": 96.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3377.004209382784, "avg_wps": 3377.395326964909, "eta_in_seconds": 77.61720930536588, "at": "2024-06-07T10:34:46.845476"}
97
+ {"lr": 2.462538893299382e-07, "step": 97, "loss": 1.285733938217163, "percent_done": 97.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3332.4240092794557, "avg_wps": 3376.92551388342, "eta_in_seconds": 58.22100582073645, "at": "2024-06-07T10:35:06.513080"}
98
+ {"lr": 1.0971820178671702e-07, "step": 98, "loss": 1.187045931816101, "percent_done": 98.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3410.7539123182646, "avg_wps": 3377.267312587766, "eta_in_seconds": 38.81007568203673, "at": "2024-06-07T10:35:25.729042"}
99
+ {"lr": 2.773702360516574e-08, "step": 99, "loss": 1.3289601802825928, "percent_done": 99.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3374.170753833146, "avg_wps": 3377.2360058017957, "eta_in_seconds": 19.40521772461708, "at": "2024-06-07T10:35:45.153363"}
100
+ {"lr": 3.9999999999999996e-10, "step": 100, "loss": 1.27885103225708, "percent_done": 100.0, "peak_allocated_mem": 22.22185754776001, "allocated_mem": 17.089967727661133, "wps": 3381.6537409694038, "avg_wps": 3377.280126017307, "eta_in_seconds": 0.0, "at": "2024-06-07T10:36:04.535381"}
governmentgpt/tb/events.out.tfevents.1717754616.15dc9f3a59ce.21751.0.train ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2096476673e5404f0d167c932dcfc993b4aefa722342cd448ce19f0e40e81d3
3
+ size 42588
governmentgpt/tb/events.out.tfevents.1717754616.15dc9f3a59ce.21751.1.eval ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9115f2568491af50087f0315d31bdd6aa60296f3fb0fee49e3f72029e7a0dd4
3
+ size 88