narySt commited on
Commit
8a3033b
·
verified ·
1 Parent(s): f3e2c85

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. pythia1b_v5_04_21/.hydra/config.yaml +47 -0
  2. pythia1b_v5_04_21/.hydra/hydra.yaml +160 -0
  3. pythia1b_v5_04_21/.hydra/overrides.yaml +1 -0
  4. pythia1b_v5_04_21/eval_results/eval_config.yaml +29 -0
  5. pythia1b_v5_04_21/eval_results/metrics_checkpoint_latest.txt +17 -0
  6. pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_10591.txt +17 -0
  7. pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_12000.txt +17 -0
  8. pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_15000.txt +17 -0
  9. pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_18000.txt +17 -0
  10. pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_21000.txt +17 -0
  11. pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_21182.txt +17 -0
  12. pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_24000.txt +17 -0
  13. pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_27000.txt +17 -0
  14. pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_3000.txt +17 -0
  15. pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_30000.txt +17 -0
  16. pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_31773.txt +17 -0
  17. pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_6000.txt +17 -0
  18. pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_9000.txt +17 -0
  19. pythia1b_v5_04_21/eval_results/metrics_initial_checkpoint.txt +17 -0
  20. pythia1b_v5_04_21/eval_results/metrics_model_best.txt +17 -0
  21. pythia1b_v5_04_21/eval_results/metrics_model_final.txt +17 -0
  22. pythia1b_v5_04_21/eval_results/predictions_checkpoint_latest.txt +0 -0
  23. pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_10591.txt +0 -0
  24. pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_12000.txt +0 -0
  25. pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_15000.txt +0 -0
  26. pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_18000.txt +0 -0
  27. pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_21000.txt +0 -0
  28. pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_21182.txt +0 -0
  29. pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_24000.txt +0 -0
  30. pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_27000.txt +0 -0
  31. pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_3000.txt +0 -0
  32. pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_30000.txt +0 -0
  33. pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_31773.txt +0 -0
  34. pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_6000.txt +0 -0
  35. pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_9000.txt +0 -0
  36. pythia1b_v5_04_21/eval_results/predictions_initial_checkpoint.txt +0 -0
  37. pythia1b_v5_04_21/eval_results/predictions_model_best.txt +0 -0
  38. pythia1b_v5_04_21/eval_results/predictions_model_final.txt +0 -0
  39. pythia1b_v5_04_21/eval_results/summary.txt +17 -0
  40. pythia1b_v5_04_21/train.log +0 -0
  41. pythia1b_v5_04_21/wandb/debug-internal.log +13 -0
  42. pythia1b_v5_04_21/wandb/debug.log +24 -0
  43. pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/files/config.yaml +126 -0
  44. pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/files/output.log +0 -0
  45. pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/files/requirements.txt +245 -0
  46. pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/files/wandb-metadata.json +47 -0
  47. pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/files/wandb-summary.json +1 -0
  48. pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/logs/debug-core.log +16 -0
  49. pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/logs/debug-internal.log +13 -0
  50. pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/logs/debug.log +24 -0
pythia1b_v5_04_21/.hydra/config.yaml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: EleutherAI/pythia-1b
3
+ checkpoint_path: null
4
+ from_scratch: false
5
+ training:
6
+ epochs: 3
7
+ batch_size: 4
8
+ eval_batch_size: 12
9
+ gradient_accumulation_steps: 4
10
+ lr: 2.0e-05
11
+ weight_decay: 0.1
12
+ betas:
13
+ - 0.9
14
+ - 0.95
15
+ eps: 1.0e-08
16
+ lr_scheduler: wsd
17
+ warmup_ratio: 0.1
18
+ decay_ratio: 0.2
19
+ warmup_steps: 100
20
+ min_lr_ratio: 0.1
21
+ max_grad_norm: 1.0
22
+ use_amp: true
23
+ resume: false
24
+ resume_checkpoint: null
25
+ data:
26
+ path: ${oc.env:PROJECT_ROOT}/code_completion_exp/datasets/data_V5_full
27
+ max_context_len: 4096
28
+ max_target_len: 256
29
+ num_workers: 4
30
+ pin_memory: true
31
+ logging:
32
+ log_interval: 10
33
+ save_interval: 3000
34
+ eval_interval: 1000
35
+ save_every_epoch: true
36
+ tracking:
37
+ enabled: true
38
+ backend: wandb
39
+ project: code-completion-full-docstring
40
+ run_name: pythia_train
41
+ entity: null
42
+ base_url: https://wandb.platun0v.ru
43
+ local_dir: ${paths.output_dir}
44
+ paths:
45
+ output_dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
46
+ seed: 42
47
+ device: cuda
pythia1b_v5_04_21/.hydra/hydra.yaml ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${paths.output_dir}
4
+ sweep:
5
+ dir: outputs/multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task: []
115
+ job:
116
+ name: train
117
+ chdir: false
118
+ override_dirname: ''
119
+ id: ???
120
+ num: ???
121
+ config_name: config
122
+ env_set: {}
123
+ env_copy: []
124
+ config:
125
+ override_dirname:
126
+ kv_sep: '='
127
+ item_sep: ','
128
+ exclude_keys: []
129
+ runtime:
130
+ version: 1.3.2
131
+ version_base: '1.3'
132
+ cwd: /home/test/byte-llms-code/code_completion_exp/train_pythia
133
+ config_sources:
134
+ - path: hydra.conf
135
+ schema: pkg
136
+ provider: hydra
137
+ - path: /home/test/byte-llms-code/code_completion_exp/train_pythia/configs
138
+ schema: file
139
+ provider: main
140
+ - path: ''
141
+ schema: structured
142
+ provider: schema
143
+ output_dir: /home/test/byte-llms-code/code_completion_exp/train_pythia/outputs/2026-04-21/20-28-37
144
+ choices:
145
+ paths: default
146
+ tracking: wandb
147
+ logging: default
148
+ data: default
149
+ training: default
150
+ model: pythia_1b
151
+ hydra/env: default
152
+ hydra/callbacks: null
153
+ hydra/job_logging: default
154
+ hydra/hydra_logging: default
155
+ hydra/hydra_help: default
156
+ hydra/help: default
157
+ hydra/sweeper: basic
158
+ hydra/launcher: basic
159
+ hydra/output: default
160
+ verbose: false
pythia1b_v5_04_21/.hydra/overrides.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ []
pythia1b_v5_04_21/eval_results/eval_config.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data:
2
+ path: /home/test/byte-llms-code/code_completion_exp/datasets/data_V5_full
3
+ max_context_len: 4096
4
+ max_target_len: 256
5
+ num_workers: 4
6
+ pin_memory: true
7
+ model:
8
+ name: EleutherAI/pythia-1b
9
+ checkpoint_path: null
10
+ from_scratch: false
11
+ paths:
12
+ checkpoints_dir: outputs/2026-04-21/20-28-37
13
+ initial_checkpoint: auto
14
+ output_dir: outputs/2026-04-21/20-28-37/eval_results
15
+ evaluation:
16
+ batch_size: 16
17
+ max_samples: null
18
+ compute_perplexity: true
19
+ bleu_tokenize: none
20
+ save_predictions: true
21
+ use_amp: true
22
+ generation:
23
+ max_new_tokens: 64
24
+ temperature: 0.1
25
+ top_k: 0
26
+ top_p: 1.0
27
+ do_sample: true
28
+ seed: 42
29
+ device: cuda
pythia1b_v5_04_21/eval_results/metrics_checkpoint_latest.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_latest.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.30150031921685466
5
+ token_accuracy: 0.3239941940977367
6
+ bleu: 16.280810769581016
7
+ perplexity: 710.7840855707159
8
+ num_samples: 37592
9
+ gen_wall_time_s: 681.0312011489659
10
+ gen_samples_per_s: 55.19864572515714
11
+ gen_time_per_sample_ms: 18.11638649576947
12
+ gen_chars_per_s: 1422.6602222708914
13
+ gen_batch_mean_ms: 822.0316826109782
14
+ gen_batch_p50_ms: 656.0139355024148
15
+ gen_batch_p95_ms: 1683.2532198521196
16
+ gen_batch_max_ms: 5435.805578999862
17
+ gen_num_batches: 784
pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_10591.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_10591.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.29402532453713554
5
+ token_accuracy: 0.3131460051089516
6
+ bleu: 15.46627135309508
7
+ perplexity: 573.579940871295
8
+ num_samples: 37592
9
+ gen_wall_time_s: 697.1238539060578
10
+ gen_samples_per_s: 53.924420731507176
11
+ gen_time_per_sample_ms: 18.544473662110498
12
+ gen_chars_per_s: 1355.0088620655529
13
+ gen_batch_mean_ms: 852.3167421757888
14
+ gen_batch_p50_ms: 675.3437760016823
15
+ gen_batch_p95_ms: 1752.8990025475644
16
+ gen_batch_max_ms: 5515.446357996552
17
+ gen_num_batches: 784
pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_12000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_12000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.29288146414130667
5
+ token_accuracy: 0.3176458823850259
6
+ bleu: 15.78001507522994
7
+ perplexity: 743.1278507940478
8
+ num_samples: 37592
9
+ gen_wall_time_s: 678.8557107109154
10
+ gen_samples_per_s: 55.375537698037
11
+ gen_time_per_sample_ms: 18.058515394523184
12
+ gen_chars_per_s: 1426.102756042461
13
+ gen_batch_mean_ms: 840.9376154973704
14
+ gen_batch_p50_ms: 666.0685060014657
15
+ gen_batch_p95_ms: 1755.3121661472678
16
+ gen_batch_max_ms: 5514.503185004287
17
+ gen_num_batches: 784
pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_15000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_15000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.30064907427112153
5
+ token_accuracy: 0.3238335924171765
6
+ bleu: 16.369268121332667
7
+ perplexity: 648.3400079564792
8
+ num_samples: 37592
9
+ gen_wall_time_s: 682.9705825539422
10
+ gen_samples_per_s: 55.04190218475613
11
+ gen_time_per_sample_ms: 18.167976765108058
12
+ gen_chars_per_s: 1415.311324809008
13
+ gen_batch_mean_ms: 816.9435996159968
14
+ gen_batch_p50_ms: 650.5265364976367
15
+ gen_batch_p95_ms: 1649.2040568959048
16
+ gen_batch_max_ms: 5439.02037099906
17
+ gen_num_batches: 784
pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_18000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_18000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3029367950627793
5
+ token_accuracy: 0.32487901845101197
6
+ bleu: 16.435138411892126
7
+ perplexity: 540.7501954984347
8
+ num_samples: 37592
9
+ gen_wall_time_s: 665.5543457790118
10
+ gen_samples_per_s: 56.48223956226996
11
+ gen_time_per_sample_ms: 17.70468040484709
12
+ gen_chars_per_s: 1447.5887748464947
13
+ gen_batch_mean_ms: 566.4292304502227
14
+ gen_batch_p50_ms: 467.9037870009779
15
+ gen_batch_p95_ms: 1174.899779098632
16
+ gen_batch_max_ms: 3922.1453899954213
17
+ gen_num_batches: 1175
pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_21000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_21000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.301393913598638
5
+ token_accuracy: 0.32462144971803797
6
+ bleu: 16.368019126858044
7
+ perplexity: 541.3429525210204
8
+ num_samples: 37592
9
+ gen_wall_time_s: 671.3920430750659
10
+ gen_samples_per_s: 55.991131244009956
11
+ gen_time_per_sample_ms: 17.859971352284152
12
+ gen_chars_per_s: 1442.0873913927937
13
+ gen_batch_mean_ms: 571.3974834681411
14
+ gen_batch_p50_ms: 470.4890410066582
15
+ gen_batch_p95_ms: 1157.3573702989959
16
+ gen_batch_max_ms: 3927.851617001579
17
+ gen_num_batches: 1175
pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_21182.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_21182.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3023249627580336
5
+ token_accuracy: 0.3244426667151502
6
+ bleu: 16.474168997905824
7
+ perplexity: 542.5514603476134
8
+ num_samples: 37592
9
+ gen_wall_time_s: 670.8535084038012
10
+ gen_samples_per_s: 56.0360787103055
11
+ gen_time_per_sample_ms: 17.8456455736274
12
+ gen_chars_per_s: 1438.8566026832016
13
+ gen_batch_mean_ms: 569.6230980945426
14
+ gen_batch_p50_ms: 471.2959009993938
15
+ gen_batch_p95_ms: 1174.0064269950378
16
+ gen_batch_max_ms: 3921.823967997625
17
+ gen_num_batches: 1175
pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_24000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_24000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3016599276441796
5
+ token_accuracy: 0.3251093152004945
6
+ bleu: 16.409958897349515
7
+ perplexity: 566.5285793566808
8
+ num_samples: 37592
9
+ gen_wall_time_s: 676.4614610540084
10
+ gen_samples_per_s: 55.57153239953558
11
+ gen_time_per_sample_ms: 17.994824990796136
12
+ gen_chars_per_s: 1434.8740554822307
13
+ gen_batch_mean_ms: 575.7118817480923
14
+ gen_batch_p50_ms: 472.5785279952106
15
+ gen_batch_p95_ms: 1164.6154464004212
16
+ gen_batch_max_ms: 3927.8858819961897
17
+ gen_num_batches: 1175
pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_27000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_27000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3014737178123005
5
+ token_accuracy: 0.3247578096354948
6
+ bleu: 16.419003802997334
7
+ perplexity: 572.8302545454646
8
+ num_samples: 37592
9
+ gen_wall_time_s: 670.9174906390836
10
+ gen_samples_per_s: 56.03073481389146
11
+ gen_time_per_sample_ms: 17.847347590952424
12
+ gen_chars_per_s: 1447.2554577092376
13
+ gen_batch_mean_ms: 569.2063897753015
14
+ gen_batch_p50_ms: 469.24721299728844
15
+ gen_batch_p95_ms: 1157.0710574953407
16
+ gen_batch_max_ms: 3923.8531240043812
17
+ gen_num_batches: 1175
pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_3000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_3000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.28325175569270056
5
+ token_accuracy: 0.3066916356826632
6
+ bleu: 14.588953791669732
7
+ perplexity: 527.8538214628118
8
+ num_samples: 37592
9
+ gen_wall_time_s: 689.2576722488011
10
+ gen_samples_per_s: 54.539835410682855
11
+ gen_time_per_sample_ms: 18.335222181549295
12
+ gen_chars_per_s: 1389.1713339599544
13
+ gen_batch_mean_ms: 586.6022742542988
14
+ gen_batch_p50_ms: 484.3167610015371
15
+ gen_batch_p95_ms: 1206.365606199688
16
+ gen_batch_max_ms: 3868.3884359998046
17
+ gen_num_batches: 1175
pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_30000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_30000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3011278995530964
5
+ token_accuracy: 0.3247790211782103
6
+ bleu: 16.428889682390672
7
+ perplexity: 576.8903125233649
8
+ num_samples: 37592
9
+ gen_wall_time_s: 668.1773826373537
10
+ gen_samples_per_s: 56.26050952461326
11
+ gen_time_per_sample_ms: 17.774456869476317
12
+ gen_chars_per_s: 1450.5145268079564
13
+ gen_batch_mean_ms: 568.6616022445563
14
+ gen_batch_p50_ms: 472.4465320032323
15
+ gen_batch_p95_ms: 1174.9766881941466
16
+ gen_batch_max_ms: 3922.6198899996234
17
+ gen_num_batches: 1175
pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_31773.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_31773.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3014737178123005
5
+ token_accuracy: 0.3244669084782536
6
+ bleu: 16.1888577028866
7
+ perplexity: 576.6120524185981
8
+ num_samples: 37592
9
+ gen_wall_time_s: 675.8306533855648
10
+ gen_samples_per_s: 55.62340182660163
11
+ gen_time_per_sample_ms: 17.978044620812
12
+ gen_chars_per_s: 1434.6759726609196
13
+ gen_batch_mean_ms: 567.6975614385199
14
+ gen_batch_p50_ms: 471.66150900011417
15
+ gen_batch_p95_ms: 1157.574844303599
16
+ gen_batch_max_ms: 3920.68699200172
17
+ gen_num_batches: 1175
pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_6000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_6000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.2844488188976378
5
+ token_accuracy: 0.3113612053004615
6
+ bleu: 15.080319950709026
7
+ perplexity: 453.1818960521432
8
+ num_samples: 37592
9
+ gen_wall_time_s: 674.0668778871186
10
+ gen_samples_per_s: 55.76894701877827
11
+ gen_time_per_sample_ms: 17.93112571523512
12
+ gen_chars_per_s: 1428.3389847279113
13
+ gen_batch_mean_ms: 569.0444115582889
14
+ gen_batch_p50_ms: 480.34932199516334
15
+ gen_batch_p95_ms: 1182.4054761891603
16
+ gen_batch_max_ms: 3867.8216240077745
17
+ gen_num_batches: 1175
pythia1b_v5_04_21/eval_results/metrics_checkpoint_step_9000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_9000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.2909927644179613
5
+ token_accuracy: 0.3142671866524852
6
+ bleu: 15.966688789870538
7
+ perplexity: 457.0689719416982
8
+ num_samples: 37592
9
+ gen_wall_time_s: 681.9195306949405
10
+ gen_samples_per_s: 55.12673901815101
11
+ gen_time_per_sample_ms: 18.140017309399354
12
+ gen_chars_per_s: 1398.1063117937088
13
+ gen_batch_mean_ms: 570.5794102875913
14
+ gen_batch_p50_ms: 473.2587530015735
15
+ gen_batch_p95_ms: 1157.3061183065875
16
+ gen_batch_max_ms: 3924.258289000136
17
+ gen_num_batches: 1175
pythia1b_v5_04_21/eval_results/metrics_initial_checkpoint.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: initial_checkpoint
2
+ ================================================================================
3
+
4
+ exact_match: 0.0
5
+ token_accuracy: 0.25446275707632215
6
+ bleu: 0.8505902631105438
7
+ perplexity: 1221.5743555150027
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1353.5502136530558
10
+ gen_samples_per_s: 27.772889118419986
11
+ gen_time_per_sample_ms: 36.00633681775526
12
+ gen_chars_per_s: 6264.1983389124025
13
+ gen_batch_mean_ms: 1691.747248192611
14
+ gen_batch_p50_ms: 1425.062906499079
15
+ gen_batch_p95_ms: 3282.010366447497
16
+ gen_batch_max_ms: 5511.020823003491
17
+ gen_num_batches: 784
pythia1b_v5_04_21/eval_results/metrics_model_best.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: model_best.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.30075547988933815
5
+ token_accuracy: 0.32501840858885667
6
+ bleu: 16.37759776617732
7
+ perplexity: 541.3429525210204
8
+ num_samples: 37592
9
+ gen_wall_time_s: 676.7071885522455
10
+ gen_samples_per_s: 55.55135313461753
11
+ gen_time_per_sample_ms: 18.001361687386826
12
+ gen_chars_per_s: 1433.1826473942099
13
+ gen_batch_mean_ms: 572.0854987192062
14
+ gen_batch_p50_ms: 468.9722350012744
15
+ gen_batch_p95_ms: 1183.3017533019301
16
+ gen_batch_max_ms: 3921.211963010137
17
+ gen_num_batches: 1175
pythia1b_v5_04_21/eval_results/metrics_model_final.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: model_final.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3001702489891466
5
+ token_accuracy: 0.3246911447869604
6
+ bleu: 16.324443990793565
7
+ perplexity: 576.6120524185981
8
+ num_samples: 37592
9
+ gen_wall_time_s: 666.1244535958976
10
+ gen_samples_per_s: 56.43389879634275
11
+ gen_time_per_sample_ms: 17.71984607352356
12
+ gen_chars_per_s: 1452.986742605242
13
+ gen_batch_mean_ms: 565.5749287258368
14
+ gen_batch_p50_ms: 468.90689300198574
15
+ gen_batch_p95_ms: 1165.2756220020817
16
+ gen_batch_max_ms: 3921.405152999796
17
+ gen_num_batches: 1175
pythia1b_v5_04_21/eval_results/predictions_checkpoint_latest.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_10591.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_12000.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_15000.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_18000.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_21000.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_21182.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_24000.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_27000.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_3000.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_30000.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_31773.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_6000.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_checkpoint_step_9000.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_initial_checkpoint.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_model_best.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/predictions_model_final.txt ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/eval_results/summary.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ EVALUATION SUMMARY
2
+ ==================================================================================================
3
+
4
+ Checkpoint Exact Match Token Acc BLEU PERPLEXITY ms/sample samp/s
5
+ --------------------------------------------------------------------------------------------------
6
+ checkpoint_step_18000 30.29% 32.49% 16.44 540.75 17.7 56.48
7
+ checkpoint_step_21000 30.14% 32.46% 16.37 541.34 17.9 55.99
8
+ checkpoint_step_21182 30.23% 32.44% 16.47 542.55 17.8 56.04
9
+ checkpoint_step_24000 30.17% 32.51% 16.41 566.53 18.0 55.57
10
+ checkpoint_step_27000 30.15% 32.48% 16.42 572.83 17.8 56.03
11
+ checkpoint_step_3000 28.33% 30.67% 14.59 527.85 18.3 54.54
12
+ checkpoint_step_30000 30.11% 32.48% 16.43 576.89 17.8 56.26
13
+ checkpoint_step_31773 30.15% 32.45% 16.19 576.61 18.0 55.62
14
+ checkpoint_step_6000 28.44% 31.14% 15.08 453.18 17.9 55.77
15
+ checkpoint_step_9000 29.10% 31.43% 15.97 457.07 18.1 55.13
16
+ model_best 30.08% 32.50% 16.38 541.34 18.0 55.55
17
+ model_final 30.02% 32.47% 16.32 576.61 17.7 56.43
pythia1b_v5_04_21/train.log ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/wandb/debug-internal.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-04-21T20:28:39.540287752Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
2
+ {"time":"2026-04-21T20:28:39.779234937Z","level":"INFO","msg":"stream: created new stream","id":"8ing6xdi"}
3
+ {"time":"2026-04-21T20:28:39.779334686Z","level":"INFO","msg":"handler: started","stream_id":"8ing6xdi"}
4
+ {"time":"2026-04-21T20:28:39.779468186Z","level":"INFO","msg":"stream: started","id":"8ing6xdi"}
5
+ {"time":"2026-04-21T20:28:39.779542005Z","level":"INFO","msg":"sender: started","stream_id":"8ing6xdi"}
6
+ {"time":"2026-04-21T20:28:39.779545215Z","level":"INFO","msg":"writer: started","stream_id":"8ing6xdi"}
7
+ {"time":"2026-04-21T20:28:39.866766122Z","level":"ERROR","msg":"git repo not found","error":"repository does not exist"}
8
+ {"time":"2026-04-22T03:07:37.46900739Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2026-04-22T03:07:37.532953738Z","level":"INFO","msg":"handler: operation stats","stats":{}}
10
+ {"time":"2026-04-22T03:07:37.536692894Z","level":"INFO","msg":"stream: closing","id":"8ing6xdi"}
11
+ {"time":"2026-04-22T03:07:37.536702644Z","level":"INFO","msg":"handler: closed","stream_id":"8ing6xdi"}
12
+ {"time":"2026-04-22T03:07:37.536758784Z","level":"INFO","msg":"sender: closed","stream_id":"8ing6xdi"}
13
+ {"time":"2026-04-22T03:07:37.536765384Z","level":"INFO","msg":"stream: closed","id":"8ing6xdi"}
pythia1b_v5_04_21/wandb/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-04-21 20:28:39,221 INFO MainThread:13721 [wandb_setup.py:_flush():81] Current SDK version is 0.24.0
2
+ 2026-04-21 20:28:39,221 INFO MainThread:13721 [wandb_setup.py:_flush():81] Configure stats pid to 13721
3
+ 2026-04-21 20:28:39,221 INFO MainThread:13721 [wandb_setup.py:_flush():81] Loading settings from environment variables
4
+ 2026-04-21 20:28:39,222 INFO MainThread:13721 [wandb_init.py:setup_run_log_directory():717] Logging user logs to outputs/2026-04-21/20-28-37/wandb/run-20260421_202839-8ing6xdi/logs/debug.log
5
+ 2026-04-21 20:28:39,222 INFO MainThread:13721 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to outputs/2026-04-21/20-28-37/wandb/run-20260421_202839-8ing6xdi/logs/debug-internal.log
6
+ 2026-04-21 20:28:39,222 INFO MainThread:13721 [wandb_init.py:init():844] calling init triggers
7
+ 2026-04-21 20:28:39,222 INFO MainThread:13721 [wandb_init.py:init():849] wandb.init called with sweep_config: {}
8
+ config: {'model': {'name': 'EleutherAI/pythia-1b', 'checkpoint_path': None, 'from_scratch': False}, 'training': {'epochs': 3, 'batch_size': 4, 'eval_batch_size': 12, 'gradient_accumulation_steps': 4, 'lr': 2e-05, 'weight_decay': 0.1, 'betas': [0.9, 0.95], 'eps': 1e-08, 'lr_scheduler': 'wsd', 'warmup_ratio': 0.1, 'decay_ratio': 0.2, 'warmup_steps': 100, 'min_lr_ratio': 0.1, 'max_grad_norm': 1.0, 'use_amp': True, 'resume': False, 'resume_checkpoint': None}, 'data': {'path': '/home/test/byte-llms-code/code_completion_exp/datasets/data_V5_full', 'max_context_len': 4096, 'max_target_len': 256, 'num_workers': 4, 'pin_memory': True}, 'logging': {'log_interval': 10, 'save_interval': 3000, 'eval_interval': 1000, 'save_every_epoch': True}, 'tracking': {'enabled': True, 'backend': 'wandb', 'project': 'code-completion-full-docstring', 'run_name': 'pythia_train', 'entity': None, 'base_url': 'https://wandb.platun0v.ru', 'local_dir': 'outputs/2026-04-21/20-28-37'}, 'paths': {'output_dir': 'outputs/2026-04-21/20-28-37'}, 'seed': 42, 'device': 'cuda', '_wandb': {'code_path': 'code/code_completion_exp/train_pythia/train.py'}}
9
+ 2026-04-21 20:28:39,222 INFO MainThread:13721 [wandb_init.py:init():892] starting backend
10
+ 2026-04-21 20:28:39,507 INFO MainThread:13721 [wandb_init.py:init():895] sending inform_init request
11
+ 2026-04-21 20:28:39,538 INFO MainThread:13721 [wandb_init.py:init():903] backend started and connected
12
+ 2026-04-21 20:28:39,541 INFO MainThread:13721 [wandb_init.py:init():973] updated telemetry
13
+ 2026-04-21 20:28:39,573 INFO MainThread:13721 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout
14
+ 2026-04-21 20:28:39,865 INFO MainThread:13721 [wandb_init.py:init():1044] starting run threads in backend
15
+ 2026-04-21 20:28:40,023 INFO MainThread:13721 [wandb_run.py:_console_start():2529] atexit reg
16
+ 2026-04-21 20:28:40,024 INFO MainThread:13721 [wandb_run.py:_redirect():2377] redirect: wrap_raw
17
+ 2026-04-21 20:28:40,024 INFO MainThread:13721 [wandb_run.py:_redirect():2446] Wrapping output streams.
18
+ 2026-04-21 20:28:40,024 INFO MainThread:13721 [wandb_run.py:_redirect():2469] Redirects installed.
19
+ 2026-04-21 20:28:40,028 INFO MainThread:13721 [wandb_init.py:init():1084] run started, returning control to user process
20
+ 2026-04-22 03:07:36,952 INFO MainThread:13721 [wandb_run.py:_finish():2295] finishing run nikita/code-completion-full-docstring/8ing6xdi
21
+ 2026-04-22 03:07:36,952 INFO MainThread:13721 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0
22
+ 2026-04-22 03:07:36,953 INFO MainThread:13721 [wandb_run.py:_restore():2476] restore
23
+ 2026-04-22 03:07:36,953 INFO MainThread:13721 [wandb_run.py:_restore():2482] restore done
24
+ 2026-04-22 03:07:37,536 INFO MainThread:13721 [wandb_run.py:_footer_sync_info():3870] logging synced files
pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/files/config.yaml ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.24.0
4
+ code_path: code/code_completion_exp/train_pythia/train.py
5
+ e:
6
+ 5t2ii4n1vl6acag1rgxxlnfw2gymqoqp:
7
+ codePath: code_completion_exp/train_pythia/train.py
8
+ codePathLocal: train.py
9
+ cpu_count: 28
10
+ cpu_count_logical: 56
11
+ cudaVersion: "13.1"
12
+ disk:
13
+ /:
14
+ total: "265567551488"
15
+ used: "47762849792"
16
+ email: nikita@local.ru
17
+ executable: /home/test/miniconda3/envs/bytellm/bin/python
18
+ git:
19
+ commit: 01bfef6a1f209a8e7ab13b76c9be95616e3c74c4
20
+ remote: https://github.com/naryst/byte-llms-code.git
21
+ gpu: NVIDIA A100-SXM4-80GB
22
+ gpu_count: 2
23
+ gpu_nvidia:
24
+ - architecture: Ampere
25
+ cudaCores: 6912
26
+ memoryTotal: "85899345920"
27
+ name: NVIDIA A100-SXM4-80GB
28
+ uuid: GPU-b1db9754-65e5-affa-70f1-cf3cd600252a
29
+ - architecture: Ampere
30
+ cudaCores: 6912
31
+ memoryTotal: "85899345920"
32
+ name: NVIDIA A100-SXM4-80GB
33
+ uuid: GPU-cf24f6d2-04cd-c64e-ebba-294de221c27e
34
+ host: compute-vm-56-238-256-ssd-1776796584508
35
+ memory:
36
+ total: "251218849792"
37
+ os: Linux-6.8.0-110-generic-x86_64-with-glibc2.39
38
+ program: /home/test/byte-llms-code/code_completion_exp/train_pythia/train.py
39
+ python: CPython 3.12.0
40
+ root: outputs/2026-04-21/20-28-37
41
+ startedAt: "2026-04-21T20:28:39.219939Z"
42
+ writerId: 5t2ii4n1vl6acag1rgxxlnfw2gymqoqp
43
+ m: []
44
+ python_version: 3.12.0
45
+ t:
46
+ "1":
47
+ - 1
48
+ - 11
49
+ - 49
50
+ - 50
51
+ - 51
52
+ - 71
53
+ - 105
54
+ "2":
55
+ - 1
56
+ - 11
57
+ - 49
58
+ - 50
59
+ - 51
60
+ - 71
61
+ - 105
62
+ "3":
63
+ - 2
64
+ - 13
65
+ - 16
66
+ - 61
67
+ "4": 3.12.0
68
+ "5": 0.24.0
69
+ "6": 4.57.6
70
+ "12": 0.24.0
71
+ "13": linux-x86_64
72
+ data:
73
+ value:
74
+ max_context_len: 4096
75
+ max_target_len: 256
76
+ num_workers: 4
77
+ path: /home/test/byte-llms-code/code_completion_exp/datasets/data_V5_full
78
+ pin_memory: true
79
+ device:
80
+ value: cuda
81
+ logging:
82
+ value:
83
+ eval_interval: 1000
84
+ log_interval: 10
85
+ save_every_epoch: true
86
+ save_interval: 3000
87
+ model:
88
+ value:
89
+ checkpoint_path: null
90
+ from_scratch: false
91
+ name: EleutherAI/pythia-1b
92
+ paths:
93
+ value:
94
+ output_dir: outputs/2026-04-21/20-28-37
95
+ seed:
96
+ value: 42
97
+ tracking:
98
+ value:
99
+ backend: wandb
100
+ base_url: https://wandb.platun0v.ru
101
+ enabled: true
102
+ entity: null
103
+ local_dir: outputs/2026-04-21/20-28-37
104
+ project: code-completion-full-docstring
105
+ run_name: pythia_train
106
+ training:
107
+ value:
108
+ batch_size: 4
109
+ betas:
110
+ - 0.9
111
+ - 0.95
112
+ decay_ratio: 0.2
113
+ epochs: 3
114
+ eps: 1e-08
115
+ eval_batch_size: 12
116
+ gradient_accumulation_steps: 4
117
+ lr: 2e-05
118
+ lr_scheduler: wsd
119
+ max_grad_norm: 1
120
+ min_lr_ratio: 0.1
121
+ resume: false
122
+ resume_checkpoint: null
123
+ use_amp: true
124
+ warmup_ratio: 0.1
125
+ warmup_steps: 100
126
+ weight_decay: 0.1
pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/files/requirements.txt ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nest-asyncio==1.6.0
2
+ safetensors==0.6.2
3
+ sentry-sdk==2.50.0
4
+ jupyter_server_terminals==0.5.3
5
+ pandocfilters==1.5.1
6
+ typer==0.21.1
7
+ causal-conv1d==1.5.0.post8
8
+ clearml==1.16.4
9
+ jupyter-lsp==2.3.0
10
+ pathlib2==2.3.7.post1
11
+ mlstm_kernels==2.0.2
12
+ fastapi==0.128.0
13
+ antlr4-python3-runtime==4.9.3
14
+ PyJWT==2.8.0
15
+ ffmpy==1.0.0
16
+ transformers==4.57.6
17
+ shellingham==1.5.4
18
+ filelock==3.19.1
19
+ nvidia-nvtx-cu12==12.4.127
20
+ nbclassic==1.3.3
21
+ nbconvert==7.16.6
22
+ traitlets==5.14.3
23
+ colorama==0.4.6
24
+ dacite==1.9.2
25
+ argon2-cffi-bindings==25.1.0
26
+ nvidia-nccl-cu12==2.21.5
27
+ httpx==0.28.1
28
+ xxhash==3.6.0
29
+ wcwidth==0.2.14
30
+ terminado==0.18.1
31
+ librt==0.8.0
32
+ notebook_shim==0.2.4
33
+ nvidia-nvjitlink-cu12==12.4.127
34
+ python-json-logger==4.0.0
35
+ nbformat==5.10.4
36
+ pip==25.2
37
+ propcache==0.4.1
38
+ attrs==25.4.0
39
+ pytz==2025.2
40
+ lxml==6.0.2
41
+ executing==2.2.1
42
+ orjson==3.11.6
43
+ plotly==6.5.2
44
+ MarkupSafe==3.0.3
45
+ nvidia-ml-py==13.590.48
46
+ types-python-dateutil==2.9.0.20251008
47
+ nvidia-cufft-cu12==11.2.1.3
48
+ jupyterlab_server==2.27.3
49
+ anyio==4.11.0
50
+ contourpy==1.3.3
51
+ ipython-genutils==0.2.0
52
+ jsonschema-specifications==2025.9.1
53
+ aiohappyeyeballs==2.6.1
54
+ starlette==0.50.0
55
+ gradio_client==2.0.3
56
+ nvidia-cublas-cu12==12.4.5.8
57
+ seaborn==0.13.2
58
+ fonttools==4.60.1
59
+ einops==0.8.1
60
+ omegaconf==2.3.0
61
+ babel==2.17.0
62
+ python-dateutil==2.9.0.post0
63
+ jupyterlab_widgets==3.0.15
64
+ nvidia-cusparse-cu12==12.3.1.170
65
+ trackio==0.15.0
66
+ fastjsonschema==2.21.2
67
+ packaging==25.0
68
+ matplotlib-inline==0.1.7
69
+ pycparser==2.23
70
+ ipykernel==6.30.1
71
+ nvidia-cuda-cupti-cu12==12.4.127
72
+ webencodings==0.5.1
73
+ jupyterlab==4.4.9
74
+ prompt_toolkit==3.0.52
75
+ jupyter_client==8.6.3
76
+ setuptools==78.1.1
77
+ sacrebleu==2.6.0
78
+ pillow==11.3.0
79
+ gradio==6.5.1
80
+ numpy==2.3.3
81
+ certifi==2025.10.5
82
+ fqdn==1.5.1
83
+ hydra-core==1.3.2
84
+ xlstm==2.0.4
85
+ psutil==7.1.0
86
+ six==1.17.0
87
+ jedi==0.19.2
88
+ reportlab==4.4.9
89
+ click==8.3.1
90
+ typing_extensions==4.15.0
91
+ triton==3.2.0
92
+ rpds-py==0.27.1
93
+ pexpect==4.9.0
94
+ jsonschema==4.25.1
95
+ ipython==9.6.0
96
+ jsonpointer==3.0.0
97
+ gitdb==4.0.12
98
+ websocket-client==1.9.0
99
+ jupyter_server==2.17.0
100
+ nvidia-cudnn-cu12==9.1.0.70
101
+ mdurl==0.1.2
102
+ async-lru==2.0.5
103
+ torch==2.6.0
104
+ tzdata==2025.2
105
+ cffi==2.0.0
106
+ tornado==6.5.2
107
+ aiohttp==3.13.1
108
+ nvidia-cuda-nvrtc-cu12==12.4.127
109
+ annotated-types==0.7.0
110
+ isoduration==20.11.0
111
+ ptyprocess==0.7.0
112
+ debugpy==1.8.17
113
+ rfc3339-validator==0.1.4
114
+ uvicorn==0.40.0
115
+ safehttpx==0.1.7
116
+ pydantic==2.12.5
117
+ mypy==1.19.1
118
+ mypy_extensions==1.1.0
119
+ pydub==0.25.1
120
+ markdown-it-py==4.0.0
121
+ cycler==0.12.1
122
+ tinycss2==1.4.0
123
+ tokenizers==0.22.1
124
+ jupyterlab_pygments==0.3.0
125
+ joypy==0.2.6
126
+ jupyter_contrib_core==0.4.2
127
+ nbclient==0.10.2
128
+ furl==2.1.4
129
+ stack-data==0.6.3
130
+ semantic-version==2.10.0
131
+ requests==2.32.5
132
+ nvidia-cufile-cu12==1.13.1.3
133
+ ipython_pygments_lexers==1.1.1
134
+ hf-xet==1.1.10
135
+ arrow==1.3.0
136
+ rfc3987-syntax==1.1.0
137
+ tomlkit==0.13.3
138
+ python-multipart==0.0.22
139
+ jupyter==1.1.1
140
+ idna==3.10
141
+ optree==0.17.0
142
+ h11==0.16.0
143
+ nvidia-curand-cu12==10.3.5.147
144
+ Pygments==2.19.2
145
+ flash_attn==2.7.4.post1
146
+ pandas==2.3.3
147
+ mamba-ssm==2.2.4
148
+ GitPython==3.1.46
149
+ ftfy==6.3.1
150
+ argon2-cffi==25.1.0
151
+ cryptography==46.0.4
152
+ json5==0.12.1
153
+ tqdm==4.67.1
154
+ annotated-doc==0.0.4
155
+ aiosignal==1.4.0
156
+ orderedmultidict==1.0.2
157
+ jupyter-events==0.12.0
158
+ pydantic_core==2.41.5
159
+ jupyter_nbextensions_configurator==0.6.4
160
+ jupyter_core==5.8.1
161
+ multiprocess==0.70.16
162
+ scipy==1.17.0
163
+ frozenlist==1.8.0
164
+ Jinja2==3.1.6
165
+ protobuf==6.33.4
166
+ wandb==0.24.0
167
+ ipywidgets==8.1.7
168
+ webcolors==24.11.1
169
+ nvidia-cuda-runtime-cu12==12.4.127
170
+ comm==0.2.3
171
+ rfc3986-validator==0.1.1
172
+ python-dotenv==1.2.1
173
+ pyzmq==27.1.0
174
+ sniffio==1.3.1
175
+ widgetsnbextension==4.0.14
176
+ kiwisolver==1.4.9
177
+ PyYAML==6.0.3
178
+ platformdirs==4.5.0
179
+ wheel==0.45.1
180
+ httpcore==1.0.9
181
+ datasets==4.3.0
182
+ prometheus_client==0.23.1
183
+ ninja==1.13.0
184
+ rich==14.2.0
185
+ Send2Trash==1.8.3
186
+ multidict==6.7.0
187
+ accelerate==1.10.1
188
+ bleach==6.2.0
189
+ defusedxml==0.7.1
190
+ huggingface-hub==0.35.3
191
+ nvidia-cusparselt-cu12==0.6.2
192
+ beautifulsoup4==4.14.2
193
+ urllib3==2.5.0
194
+ matplotlib==3.10.7
195
+ nvidia-cusolver-cu12==11.6.1.9
196
+ portalocker==3.2.0
197
+ sympy==1.13.1
198
+ brotli==1.2.0
199
+ jupyter-console==6.6.3
200
+ pathspec==1.0.4
201
+ narwhals==2.15.0
202
+ uri-template==1.3.0
203
+ parso==0.8.5
204
+ fsspec==2025.9.0
205
+ typing-inspection==0.4.2
206
+ asttokens==3.0.0
207
+ mistune==3.1.4
208
+ pyarrow==22.0.0
209
+ Authlib==1.6.6
210
+ yarl==1.22.0
211
+ charset-normalizer==3.4.3
212
+ lark==1.3.0
213
+ tabulate==0.9.0
214
+ regex==2025.9.18
215
+ hnet==0.0.1
216
+ dill==0.4.0
217
+ referencing==0.36.2
218
+ networkx==3.5
219
+ notebook==7.4.7
220
+ soupsieve==2.8
221
+ itsdangerous==2.2.0
222
+ opt_einsum==3.4.0
223
+ aiofiles==24.1.0
224
+ decorator==5.2.1
225
+ pyparsing==3.2.5
226
+ pure_eval==0.2.3
227
+ groovy==0.1.2
228
+ mpmath==1.3.0
229
+ smmap==5.0.2
230
+ inflect==7.3.1
231
+ platformdirs==4.2.2
232
+ jaraco.collections==5.1.0
233
+ importlib_metadata==8.0.0
234
+ tomli==2.0.1
235
+ backports.tarfile==1.2.0
236
+ jaraco.text==3.12.1
237
+ typeguard==4.3.0
238
+ autocommand==2.2.2
239
+ wheel==0.45.1
240
+ jaraco.context==5.3.0
241
+ packaging==24.2
242
+ more-itertools==10.3.0
243
+ typing_extensions==4.12.2
244
+ zipp==3.19.2
245
+ jaraco.functools==4.0.1
pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-110-generic-x86_64-with-glibc2.39",
3
+ "python": "CPython 3.12.0",
4
+ "startedAt": "2026-04-21T20:28:39.219939Z",
5
+ "program": "/home/test/byte-llms-code/code_completion_exp/train_pythia/train.py",
6
+ "codePath": "code_completion_exp/train_pythia/train.py",
7
+ "codePathLocal": "train.py",
8
+ "git": {
9
+ "remote": "https://github.com/naryst/byte-llms-code.git",
10
+ "commit": "01bfef6a1f209a8e7ab13b76c9be95616e3c74c4"
11
+ },
12
+ "email": "nikita@local.ru",
13
+ "root": "outputs/2026-04-21/20-28-37",
14
+ "host": "compute-vm-56-238-256-ssd-1776796584508",
15
+ "executable": "/home/test/miniconda3/envs/bytellm/bin/python",
16
+ "cpu_count": 28,
17
+ "cpu_count_logical": 56,
18
+ "gpu": "NVIDIA A100-SXM4-80GB",
19
+ "gpu_count": 2,
20
+ "disk": {
21
+ "/": {
22
+ "total": "265567551488",
23
+ "used": "47762849792"
24
+ }
25
+ },
26
+ "memory": {
27
+ "total": "251218849792"
28
+ },
29
+ "gpu_nvidia": [
30
+ {
31
+ "name": "NVIDIA A100-SXM4-80GB",
32
+ "memoryTotal": "85899345920",
33
+ "cudaCores": 6912,
34
+ "architecture": "Ampere",
35
+ "uuid": "GPU-b1db9754-65e5-affa-70f1-cf3cd600252a"
36
+ },
37
+ {
38
+ "name": "NVIDIA A100-SXM4-80GB",
39
+ "memoryTotal": "85899345920",
40
+ "cudaCores": 6912,
41
+ "architecture": "Ampere",
42
+ "uuid": "GPU-cf24f6d2-04cd-c64e-ebba-294de221c27e"
43
+ }
44
+ ],
45
+ "cudaVersion": "13.1",
46
+ "writerId": "5t2ii4n1vl6acag1rgxxlnfw2gymqoqp"
47
+ }
pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"epoch/loss":0.8193523640954421,"val/perplexity":2.954253480862506,"train/loss_avg":0.8192876343746018,"train/step_time":0.5378646373748779,"best/val_loss":1.0519802477912075,"train/lr":2.0000000000000003e-06,"_timestamp":1.776827170070497e+09,"_step":31770,"_wandb":{"runtime":23937},"train/loss":0.9364707842469215,"val/time":184.67540502548218,"train/epoch":3,"best/val_perplexity":2.912696143858297,"best/step":21000,"epoch/time":7722.56347155571,"_runtime":23937,"val/loss":1.0659428229359358}
pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/logs/debug-core.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-04-21T20:28:39.323073805Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp0kqo2ro3/port-13721.txt","pid":13721,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2026-04-21T20:28:39.323771093Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":13721}
3
+ {"time":"2026-04-21T20:28:39.323710273Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-13721-13772-993102315/socket","Net":"unix"}}
4
+ {"time":"2026-04-21T20:28:39.506674337Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2026-04-21T20:28:39.540165162Z","level":"INFO","msg":"handleInformInit: received","streamId":"8ing6xdi","id":"1(@)"}
6
+ {"time":"2026-04-21T20:28:39.779483135Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"8ing6xdi","id":"1(@)"}
7
+ {"time":"2026-04-22T03:07:37.536676435Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"8ing6xdi","id":"1(@)"}
8
+ {"time":"2026-04-22T03:07:40.251497019Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"8ing6xdi","id":"1(@)"}
9
+ {"time":"2026-04-22T03:07:40.251582799Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
10
+ {"time":"2026-04-22T03:07:40.251608679Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
11
+ {"time":"2026-04-22T03:07:40.251627789Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
12
+ {"time":"2026-04-22T03:07:40.251650818Z","level":"INFO","msg":"server is shutting down"}
13
+ {"time":"2026-04-22T03:07:40.251686398Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
14
+ {"time":"2026-04-22T03:07:40.251697698Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
15
+ {"time":"2026-04-22T03:07:40.251873857Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-13721-13772-993102315/socket","Net":"unix"}}
16
+ {"time":"2026-04-22T03:07:40.251925627Z","level":"INFO","msg":"server is closed"}
pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/logs/debug-internal.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-04-21T20:28:39.540287752Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
2
+ {"time":"2026-04-21T20:28:39.779234937Z","level":"INFO","msg":"stream: created new stream","id":"8ing6xdi"}
3
+ {"time":"2026-04-21T20:28:39.779334686Z","level":"INFO","msg":"handler: started","stream_id":"8ing6xdi"}
4
+ {"time":"2026-04-21T20:28:39.779468186Z","level":"INFO","msg":"stream: started","id":"8ing6xdi"}
5
+ {"time":"2026-04-21T20:28:39.779542005Z","level":"INFO","msg":"sender: started","stream_id":"8ing6xdi"}
6
+ {"time":"2026-04-21T20:28:39.779545215Z","level":"INFO","msg":"writer: started","stream_id":"8ing6xdi"}
7
+ {"time":"2026-04-21T20:28:39.866766122Z","level":"ERROR","msg":"git repo not found","error":"repository does not exist"}
8
+ {"time":"2026-04-22T03:07:37.46900739Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2026-04-22T03:07:37.532953738Z","level":"INFO","msg":"handler: operation stats","stats":{}}
10
+ {"time":"2026-04-22T03:07:37.536692894Z","level":"INFO","msg":"stream: closing","id":"8ing6xdi"}
11
+ {"time":"2026-04-22T03:07:37.536702644Z","level":"INFO","msg":"handler: closed","stream_id":"8ing6xdi"}
12
+ {"time":"2026-04-22T03:07:37.536758784Z","level":"INFO","msg":"sender: closed","stream_id":"8ing6xdi"}
13
+ {"time":"2026-04-22T03:07:37.536765384Z","level":"INFO","msg":"stream: closed","id":"8ing6xdi"}
pythia1b_v5_04_21/wandb/run-20260421_202839-8ing6xdi/logs/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-04-21 20:28:39,221 INFO MainThread:13721 [wandb_setup.py:_flush():81] Current SDK version is 0.24.0
2
+ 2026-04-21 20:28:39,221 INFO MainThread:13721 [wandb_setup.py:_flush():81] Configure stats pid to 13721
3
+ 2026-04-21 20:28:39,221 INFO MainThread:13721 [wandb_setup.py:_flush():81] Loading settings from environment variables
4
+ 2026-04-21 20:28:39,222 INFO MainThread:13721 [wandb_init.py:setup_run_log_directory():717] Logging user logs to outputs/2026-04-21/20-28-37/wandb/run-20260421_202839-8ing6xdi/logs/debug.log
5
+ 2026-04-21 20:28:39,222 INFO MainThread:13721 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to outputs/2026-04-21/20-28-37/wandb/run-20260421_202839-8ing6xdi/logs/debug-internal.log
6
+ 2026-04-21 20:28:39,222 INFO MainThread:13721 [wandb_init.py:init():844] calling init triggers
7
+ 2026-04-21 20:28:39,222 INFO MainThread:13721 [wandb_init.py:init():849] wandb.init called with sweep_config: {}
8
+ config: {'model': {'name': 'EleutherAI/pythia-1b', 'checkpoint_path': None, 'from_scratch': False}, 'training': {'epochs': 3, 'batch_size': 4, 'eval_batch_size': 12, 'gradient_accumulation_steps': 4, 'lr': 2e-05, 'weight_decay': 0.1, 'betas': [0.9, 0.95], 'eps': 1e-08, 'lr_scheduler': 'wsd', 'warmup_ratio': 0.1, 'decay_ratio': 0.2, 'warmup_steps': 100, 'min_lr_ratio': 0.1, 'max_grad_norm': 1.0, 'use_amp': True, 'resume': False, 'resume_checkpoint': None}, 'data': {'path': '/home/test/byte-llms-code/code_completion_exp/datasets/data_V5_full', 'max_context_len': 4096, 'max_target_len': 256, 'num_workers': 4, 'pin_memory': True}, 'logging': {'log_interval': 10, 'save_interval': 3000, 'eval_interval': 1000, 'save_every_epoch': True}, 'tracking': {'enabled': True, 'backend': 'wandb', 'project': 'code-completion-full-docstring', 'run_name': 'pythia_train', 'entity': None, 'base_url': 'https://wandb.platun0v.ru', 'local_dir': 'outputs/2026-04-21/20-28-37'}, 'paths': {'output_dir': 'outputs/2026-04-21/20-28-37'}, 'seed': 42, 'device': 'cuda', '_wandb': {'code_path': 'code/code_completion_exp/train_pythia/train.py'}}
9
+ 2026-04-21 20:28:39,222 INFO MainThread:13721 [wandb_init.py:init():892] starting backend
10
+ 2026-04-21 20:28:39,507 INFO MainThread:13721 [wandb_init.py:init():895] sending inform_init request
11
+ 2026-04-21 20:28:39,538 INFO MainThread:13721 [wandb_init.py:init():903] backend started and connected
12
+ 2026-04-21 20:28:39,541 INFO MainThread:13721 [wandb_init.py:init():973] updated telemetry
13
+ 2026-04-21 20:28:39,573 INFO MainThread:13721 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout
14
+ 2026-04-21 20:28:39,865 INFO MainThread:13721 [wandb_init.py:init():1044] starting run threads in backend
15
+ 2026-04-21 20:28:40,023 INFO MainThread:13721 [wandb_run.py:_console_start():2529] atexit reg
16
+ 2026-04-21 20:28:40,024 INFO MainThread:13721 [wandb_run.py:_redirect():2377] redirect: wrap_raw
17
+ 2026-04-21 20:28:40,024 INFO MainThread:13721 [wandb_run.py:_redirect():2446] Wrapping output streams.
18
+ 2026-04-21 20:28:40,024 INFO MainThread:13721 [wandb_run.py:_redirect():2469] Redirects installed.
19
+ 2026-04-21 20:28:40,028 INFO MainThread:13721 [wandb_init.py:init():1084] run started, returning control to user process
20
+ 2026-04-22 03:07:36,952 INFO MainThread:13721 [wandb_run.py:_finish():2295] finishing run nikita/code-completion-full-docstring/8ing6xdi
21
+ 2026-04-22 03:07:36,952 INFO MainThread:13721 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0
22
+ 2026-04-22 03:07:36,953 INFO MainThread:13721 [wandb_run.py:_restore():2476] restore
23
+ 2026-04-22 03:07:36,953 INFO MainThread:13721 [wandb_run.py:_restore():2482] restore done
24
+ 2026-04-22 03:07:37,536 INFO MainThread:13721 [wandb_run.py:_footer_sync_info():3870] logging synced files