azherali commited on
Commit
68afa38
·
verified ·
1 Parent(s): e507eac

Training in progress, step 56000, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -32,8 +32,8 @@
32
  "rank_pattern": {},
33
  "revision": null,
34
  "target_modules": [
35
- "query",
36
- "value"
37
  ],
38
  "target_parameters": null,
39
  "task_type": "SEQ_CLS",
 
32
  "rank_pattern": {},
33
  "revision": null,
34
  "target_modules": [
35
+ "value",
36
+ "query"
37
  ],
38
  "target_parameters": null,
39
  "task_type": "SEQ_CLS",
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e01ff6aadb44db27b60f5a2939c1f91ceef8ce4c8eda33e8448193e02f7dedf
3
  size 3555504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:722a67a3849c15e216b0363ac3e14756a528f10d48549eada44c7a73a2f1d436
3
  size 3555504
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b312a19a4c3a50bbc6d51dc9137976fee7cefb77fe462694cf14c53d8b7b3ed9
3
  size 7141515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fe2d82a60d86eb55584c63cb21eb22bb6b83639265e9eef0fddd116db74868c
3
  size 7141515
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01dad4027d20eca2d7fa4b583f03a1d3875b3ab481ed98527232c092bb93df17
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d4c8f50583c3aaf473dbb3c038d9d85411cdae997b9d9d7d896c343f0b11ce3
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a000201d58220548b692d7c263c2ef536a136348b8e258b7e7e4280e42ea9770
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:703b98d29b3227b33c03f1f3acceddb6d9d0304c7caa04dc7709db0049f62b2f
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c18215b26f935a6486c705cff1ccfa7de15b6db51bcfbab399fb0323c2730116
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ba460a6fb5aaf6e3988a2082759c98869c4746962f11e515d182f4694213b2
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 52000,
3
  "best_metric": 0.9908199660129274,
4
  "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-52000",
5
- "epoch": 1.6640000000000001,
6
  "eval_steps": 4000,
7
- "global_step": 52000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3804,6 +3804,298 @@
3804
  "eval_samples_per_second": 129.199,
3805
  "eval_steps_per_second": 8.075,
3806
  "step": 52000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3807
  }
3808
  ],
3809
  "logging_steps": 100,
@@ -3818,7 +4110,7 @@
3818
  "early_stopping_threshold": 0.0
3819
  },
3820
  "attributes": {
3821
- "early_stopping_patience_counter": 0
3822
  }
3823
  },
3824
  "TrainerControl": {
@@ -3832,7 +4124,7 @@
3832
  "attributes": {}
3833
  }
3834
  },
3835
- "total_flos": 2.208251621920823e+17,
3836
  "train_batch_size": 16,
3837
  "trial_name": null,
3838
  "trial_params": null
 
2
  "best_global_step": 52000,
3
  "best_metric": 0.9908199660129274,
4
  "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-52000",
5
+ "epoch": 1.792,
6
  "eval_steps": 4000,
7
+ "global_step": 56000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3804
  "eval_samples_per_second": 129.199,
3805
  "eval_steps_per_second": 8.075,
3806
  "step": 52000
3807
+ },
3808
+ {
3809
+ "epoch": 1.6672,
3810
+ "grad_norm": 4.400208473205566,
3811
+ "learning_rate": 1.3374125200642056e-05,
3812
+ "loss": 0.0351,
3813
+ "step": 52100
3814
+ },
3815
+ {
3816
+ "epoch": 1.6703999999999999,
3817
+ "grad_norm": 5.015504837036133,
3818
+ "learning_rate": 1.3361284109149279e-05,
3819
+ "loss": 0.0528,
3820
+ "step": 52200
3821
+ },
3822
+ {
3823
+ "epoch": 1.6736,
3824
+ "grad_norm": 0.026230236515402794,
3825
+ "learning_rate": 1.3348443017656504e-05,
3826
+ "loss": 0.0435,
3827
+ "step": 52300
3828
+ },
3829
+ {
3830
+ "epoch": 1.6768,
3831
+ "grad_norm": 8.737045288085938,
3832
+ "learning_rate": 1.3335601926163727e-05,
3833
+ "loss": 0.0466,
3834
+ "step": 52400
3835
+ },
3836
+ {
3837
+ "epoch": 1.6800000000000002,
3838
+ "grad_norm": 0.2274281084537506,
3839
+ "learning_rate": 1.332276083467095e-05,
3840
+ "loss": 0.0561,
3841
+ "step": 52500
3842
+ },
3843
+ {
3844
+ "epoch": 1.6832,
3845
+ "grad_norm": 0.006623820401728153,
3846
+ "learning_rate": 1.330991974317817e-05,
3847
+ "loss": 0.0284,
3848
+ "step": 52600
3849
+ },
3850
+ {
3851
+ "epoch": 1.6864,
3852
+ "grad_norm": 0.0021350777242332697,
3853
+ "learning_rate": 1.3297078651685393e-05,
3854
+ "loss": 0.0511,
3855
+ "step": 52700
3856
+ },
3857
+ {
3858
+ "epoch": 1.6896,
3859
+ "grad_norm": 0.018413754180073738,
3860
+ "learning_rate": 1.3284237560192616e-05,
3861
+ "loss": 0.0543,
3862
+ "step": 52800
3863
+ },
3864
+ {
3865
+ "epoch": 1.6928,
3866
+ "grad_norm": 0.1010914072394371,
3867
+ "learning_rate": 1.3271396468699839e-05,
3868
+ "loss": 0.0453,
3869
+ "step": 52900
3870
+ },
3871
+ {
3872
+ "epoch": 1.696,
3873
+ "grad_norm": 0.004039776977151632,
3874
+ "learning_rate": 1.3258555377207064e-05,
3875
+ "loss": 0.0338,
3876
+ "step": 53000
3877
+ },
3878
+ {
3879
+ "epoch": 1.6992,
3880
+ "grad_norm": 14.739033699035645,
3881
+ "learning_rate": 1.3245714285714287e-05,
3882
+ "loss": 0.0382,
3883
+ "step": 53100
3884
+ },
3885
+ {
3886
+ "epoch": 1.7024,
3887
+ "grad_norm": 0.016198845580220222,
3888
+ "learning_rate": 1.323287319422151e-05,
3889
+ "loss": 0.016,
3890
+ "step": 53200
3891
+ },
3892
+ {
3893
+ "epoch": 1.7056,
3894
+ "grad_norm": 2.079624891281128,
3895
+ "learning_rate": 1.3220032102728733e-05,
3896
+ "loss": 0.0627,
3897
+ "step": 53300
3898
+ },
3899
+ {
3900
+ "epoch": 1.7088,
3901
+ "grad_norm": 0.11790735274553299,
3902
+ "learning_rate": 1.3207191011235956e-05,
3903
+ "loss": 0.0592,
3904
+ "step": 53400
3905
+ },
3906
+ {
3907
+ "epoch": 1.712,
3908
+ "grad_norm": 1.2402265071868896,
3909
+ "learning_rate": 1.319434991974318e-05,
3910
+ "loss": 0.0355,
3911
+ "step": 53500
3912
+ },
3913
+ {
3914
+ "epoch": 1.7151999999999998,
3915
+ "grad_norm": 0.004012857098132372,
3916
+ "learning_rate": 1.3181508828250403e-05,
3917
+ "loss": 0.0233,
3918
+ "step": 53600
3919
+ },
3920
+ {
3921
+ "epoch": 1.7184,
3922
+ "grad_norm": 9.97103500366211,
3923
+ "learning_rate": 1.3168667736757626e-05,
3924
+ "loss": 0.0416,
3925
+ "step": 53700
3926
+ },
3927
+ {
3928
+ "epoch": 1.7216,
3929
+ "grad_norm": 0.00929461419582367,
3930
+ "learning_rate": 1.3155826645264849e-05,
3931
+ "loss": 0.0245,
3932
+ "step": 53800
3933
+ },
3934
+ {
3935
+ "epoch": 1.7248,
3936
+ "grad_norm": 0.014831352047622204,
3937
+ "learning_rate": 1.3142985553772072e-05,
3938
+ "loss": 0.0568,
3939
+ "step": 53900
3940
+ },
3941
+ {
3942
+ "epoch": 1.728,
3943
+ "grad_norm": 0.017993494868278503,
3944
+ "learning_rate": 1.3130144462279295e-05,
3945
+ "loss": 0.0316,
3946
+ "step": 54000
3947
+ },
3948
+ {
3949
+ "epoch": 1.7311999999999999,
3950
+ "grad_norm": 7.752192974090576,
3951
+ "learning_rate": 1.3117303370786518e-05,
3952
+ "loss": 0.0435,
3953
+ "step": 54100
3954
+ },
3955
+ {
3956
+ "epoch": 1.7344,
3957
+ "grad_norm": 0.022409453988075256,
3958
+ "learning_rate": 1.3104462279293741e-05,
3959
+ "loss": 0.0272,
3960
+ "step": 54200
3961
+ },
3962
+ {
3963
+ "epoch": 1.7376,
3964
+ "grad_norm": 0.022196965292096138,
3965
+ "learning_rate": 1.3091621187800965e-05,
3966
+ "loss": 0.0389,
3967
+ "step": 54300
3968
+ },
3969
+ {
3970
+ "epoch": 1.7408000000000001,
3971
+ "grad_norm": 0.010344170965254307,
3972
+ "learning_rate": 1.3078780096308188e-05,
3973
+ "loss": 0.0292,
3974
+ "step": 54400
3975
+ },
3976
+ {
3977
+ "epoch": 1.744,
3978
+ "grad_norm": 0.026820389553904533,
3979
+ "learning_rate": 1.306593900481541e-05,
3980
+ "loss": 0.0318,
3981
+ "step": 54500
3982
+ },
3983
+ {
3984
+ "epoch": 1.7471999999999999,
3985
+ "grad_norm": 3.427290916442871,
3986
+ "learning_rate": 1.3053097913322634e-05,
3987
+ "loss": 0.04,
3988
+ "step": 54600
3989
+ },
3990
+ {
3991
+ "epoch": 1.7504,
3992
+ "grad_norm": 0.0476866252720356,
3993
+ "learning_rate": 1.3040256821829857e-05,
3994
+ "loss": 0.0356,
3995
+ "step": 54700
3996
+ },
3997
+ {
3998
+ "epoch": 1.7536,
3999
+ "grad_norm": 0.16033174097537994,
4000
+ "learning_rate": 1.302741573033708e-05,
4001
+ "loss": 0.0465,
4002
+ "step": 54800
4003
+ },
4004
+ {
4005
+ "epoch": 1.7568000000000001,
4006
+ "grad_norm": 0.00891600176692009,
4007
+ "learning_rate": 1.3014574638844302e-05,
4008
+ "loss": 0.0354,
4009
+ "step": 54900
4010
+ },
4011
+ {
4012
+ "epoch": 1.76,
4013
+ "grad_norm": 0.09468699991703033,
4014
+ "learning_rate": 1.3001733547351525e-05,
4015
+ "loss": 0.049,
4016
+ "step": 55000
4017
+ },
4018
+ {
4019
+ "epoch": 1.7631999999999999,
4020
+ "grad_norm": 0.11951547861099243,
4021
+ "learning_rate": 1.2988892455858748e-05,
4022
+ "loss": 0.0488,
4023
+ "step": 55100
4024
+ },
4025
+ {
4026
+ "epoch": 1.7664,
4027
+ "grad_norm": 0.008669690228998661,
4028
+ "learning_rate": 1.2976051364365971e-05,
4029
+ "loss": 0.0418,
4030
+ "step": 55200
4031
+ },
4032
+ {
4033
+ "epoch": 1.7696,
4034
+ "grad_norm": 5.866086006164551,
4035
+ "learning_rate": 1.2963210272873194e-05,
4036
+ "loss": 0.0452,
4037
+ "step": 55300
4038
+ },
4039
+ {
4040
+ "epoch": 1.7728000000000002,
4041
+ "grad_norm": 0.007491165306419134,
4042
+ "learning_rate": 1.2950369181380417e-05,
4043
+ "loss": 0.0331,
4044
+ "step": 55400
4045
+ },
4046
+ {
4047
+ "epoch": 1.776,
4048
+ "grad_norm": 0.10585305094718933,
4049
+ "learning_rate": 1.293752808988764e-05,
4050
+ "loss": 0.0406,
4051
+ "step": 55500
4052
+ },
4053
+ {
4054
+ "epoch": 1.7792,
4055
+ "grad_norm": 0.3077317178249359,
4056
+ "learning_rate": 1.2924686998394864e-05,
4057
+ "loss": 0.0455,
4058
+ "step": 55600
4059
+ },
4060
+ {
4061
+ "epoch": 1.7824,
4062
+ "grad_norm": 0.008049139752984047,
4063
+ "learning_rate": 1.2911845906902087e-05,
4064
+ "loss": 0.0271,
4065
+ "step": 55700
4066
+ },
4067
+ {
4068
+ "epoch": 1.7856,
4069
+ "grad_norm": 0.005238874349743128,
4070
+ "learning_rate": 1.289900481540931e-05,
4071
+ "loss": 0.0312,
4072
+ "step": 55800
4073
+ },
4074
+ {
4075
+ "epoch": 1.7888,
4076
+ "grad_norm": 0.011361650191247463,
4077
+ "learning_rate": 1.2886163723916533e-05,
4078
+ "loss": 0.0387,
4079
+ "step": 55900
4080
+ },
4081
+ {
4082
+ "epoch": 1.792,
4083
+ "grad_norm": 0.0051416209898889065,
4084
+ "learning_rate": 1.2873322632423756e-05,
4085
+ "loss": 0.0374,
4086
+ "step": 56000
4087
+ },
4088
+ {
4089
+ "epoch": 1.792,
4090
+ "eval_accuracy": 0.99052,
4091
+ "eval_f1": 0.9905207896506175,
4092
+ "eval_loss": 0.03697649762034416,
4093
+ "eval_precision": 0.9905285934060213,
4094
+ "eval_recall": 0.99052,
4095
+ "eval_runtime": 764.6577,
4096
+ "eval_samples_per_second": 130.777,
4097
+ "eval_steps_per_second": 8.174,
4098
+ "step": 56000
4099
  }
4100
  ],
4101
  "logging_steps": 100,
 
4110
  "early_stopping_threshold": 0.0
4111
  },
4112
  "attributes": {
4113
+ "early_stopping_patience_counter": 1
4114
  }
4115
  },
4116
  "TrainerControl": {
 
4124
  "attributes": {}
4125
  }
4126
  },
4127
+ "total_flos": 2.3781344809356787e+17,
4128
  "train_batch_size": 16,
4129
  "trial_name": null,
4130
  "trial_params": null