lapp0 commited on
Commit
b618c80
·
verified ·
1 Parent(s): 26f1024

Training in progress, step 24750

Browse files
Files changed (10) hide show
  1. logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_cosine_distance_loss())__attentions_weight_0__attention/events.out.tfevents.1723582735.93d6cbb3ad53 +3 -0
  2. logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_cross_entropy())__attentions_weight_0__attentions_loss_/events.out.tfevents.1723582203.93d6cbb3ad53 +3 -0
  3. logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_mse_loss())__attentions_weight_0__attentions_loss_fn_(f/events.out.tfevents.1723582416.93d6cbb3ad53 +3 -0
  4. logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_mutual_information_loss())__attentions_weight_0__attent/events.out.tfevents.1723582629.93d6cbb3ad53 +3 -0
  5. logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_reverse_kl_divergence_loss())__attentions_weight_0__att/events.out.tfevents.1723582522.93d6cbb3ad53 +3 -0
  6. logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_soft_mse_loss())__attentions_weight_0__attentions_loss_/events.out.tfevents.1723582310.93d6cbb3ad53 +3 -0
  7. logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0__activations_loss_fn_(fn_soft_mse_loss())__attentions_weight_0__attentions_loss_fn/events.out.tfevents.1723582039.93d6cbb3ad53 +2 -2
  8. logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__hs_weight_0.2__hs_loss_fn_(fn_cross_entropy())__attn_weight_0__attn_loss_fn_(fn_soft_mse_loss()))/events.out.tfevents.1723583180.93d6cbb3ad53 +3 -0
  9. model.safetensors +1 -1
  10. training_args.bin +1 -1
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_cosine_distance_loss())__attentions_weight_0__attention/events.out.tfevents.1723582735.93d6cbb3ad53 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba9fdeda4e772865c178685edc77be520f0a6dbc8ea2d77e29e80db917c610dc
3
+ size 6149
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_cross_entropy())__attentions_weight_0__attentions_loss_/events.out.tfevents.1723582203.93d6cbb3ad53 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15ba5764f91c0596b10749bca2e5a75188cbda241f9b94d68c448175721586d4
3
+ size 6149
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_mse_loss())__attentions_weight_0__attentions_loss_fn_(f/events.out.tfevents.1723582416.93d6cbb3ad53 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b4230504d8a0c581d826ffff37fa28aecd9bbbd8bcbf1d8443d0546aceb5c61
3
+ size 6149
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_mutual_information_loss())__attentions_weight_0__attent/events.out.tfevents.1723582629.93d6cbb3ad53 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb4c7d7bc388f183e715262de28fd2292825ffb6240ab7aad700eb429a9499ec
3
+ size 6149
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_reverse_kl_divergence_loss())__attentions_weight_0__att/events.out.tfevents.1723582522.93d6cbb3ad53 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:597d4d694b59829ff455813d6013ebbff51831e62c19f9db5fe87841b49acddd
3
+ size 6149
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_soft_mse_loss())__attentions_weight_0__attentions_loss_/events.out.tfevents.1723582310.93d6cbb3ad53 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cf62014d212966f2eedb194ecf432b51cc2710d87acd83ffca976a6549e39a4
3
+ size 6149
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0__activations_loss_fn_(fn_soft_mse_loss())__attentions_weight_0__attentions_loss_fn/events.out.tfevents.1723582039.93d6cbb3ad53 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f32346227dc28a76b5048d7c966ee047106262bea4b507d683351dc0306324e
3
- size 253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83904741a07317315e1899c4af70a28699979582ad6255f4dd296338ea2d6312
3
+ size 529
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__hs_weight_0.2__hs_loss_fn_(fn_cross_entropy())__attn_weight_0__attn_loss_fn_(fn_soft_mse_loss()))/events.out.tfevents.1723583180.93d6cbb3ad53 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e1d6e7a59cae204ed1614235b2190a556aee3dfd6fcb5908e2b788166beb9af
3
+ size 6729491
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92a4382549a0f3cb072a2a27c850395da123bc1f311af2d205871ff33f505b46
3
  size 248894656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fbca352ddfbff6dcf1dc3331aec242444664e53c80bb40bc960208191158d6c
3
  size 248894656
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9070b76a5f80249082771c15dfc294c6719d84de673e916fa701ab17b3e20f2d
3
  size 907106756
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37139f44fbbb7f4d764e54596df0a00f47778729fc8d96898077cd95bbf26dad
3
  size 907106756