Training in progress, step 24750
Browse files- logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_cosine_distance_loss())__attentions_weight_0__attention/events.out.tfevents.1723582735.93d6cbb3ad53 +3 -0
- logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_cross_entropy())__attentions_weight_0__attentions_loss_/events.out.tfevents.1723582203.93d6cbb3ad53 +3 -0
- logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_mse_loss())__attentions_weight_0__attentions_loss_fn_(f/events.out.tfevents.1723582416.93d6cbb3ad53 +3 -0
- logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_mutual_information_loss())__attentions_weight_0__attent/events.out.tfevents.1723582629.93d6cbb3ad53 +3 -0
- logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_reverse_kl_divergence_loss())__attentions_weight_0__att/events.out.tfevents.1723582522.93d6cbb3ad53 +3 -0
- logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_soft_mse_loss())__attentions_weight_0__attentions_loss_/events.out.tfevents.1723582310.93d6cbb3ad53 +3 -0
- logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0__activations_loss_fn_(fn_soft_mse_loss())__attentions_weight_0__attentions_loss_fn/events.out.tfevents.1723582039.93d6cbb3ad53 +2 -2
- logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__hs_weight_0.2__hs_loss_fn_(fn_cross_entropy())__attn_weight_0__attn_loss_fn_(fn_soft_mse_loss()))/events.out.tfevents.1723583180.93d6cbb3ad53 +3 -0
- model.safetensors +1 -1
- training_args.bin +1 -1
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_cosine_distance_loss())__attentions_weight_0__attention/events.out.tfevents.1723582735.93d6cbb3ad53
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba9fdeda4e772865c178685edc77be520f0a6dbc8ea2d77e29e80db917c610dc
|
| 3 |
+
size 6149
|
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_cross_entropy())__attentions_weight_0__attentions_loss_/events.out.tfevents.1723582203.93d6cbb3ad53
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15ba5764f91c0596b10749bca2e5a75188cbda241f9b94d68c448175721586d4
|
| 3 |
+
size 6149
|
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_mse_loss())__attentions_weight_0__attentions_loss_fn_(f/events.out.tfevents.1723582416.93d6cbb3ad53
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b4230504d8a0c581d826ffff37fa28aecd9bbbd8bcbf1d8443d0546aceb5c61
|
| 3 |
+
size 6149
|
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_mutual_information_loss())__attentions_weight_0__attent/events.out.tfevents.1723582629.93d6cbb3ad53
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb4c7d7bc388f183e715262de28fd2292825ffb6240ab7aad700eb429a9499ec
|
| 3 |
+
size 6149
|
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_reverse_kl_divergence_loss())__attentions_weight_0__att/events.out.tfevents.1723582522.93d6cbb3ad53
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:597d4d694b59829ff455813d6013ebbff51831e62c19f9db5fe87841b49acddd
|
| 3 |
+
size 6149
|
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0.2__activations_loss_fn_(fn_soft_mse_loss())__attentions_weight_0__attentions_loss_/events.out.tfevents.1723582310.93d6cbb3ad53
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6cf62014d212966f2eedb194ecf432b51cc2710d87acd83ffca976a6549e39a4
|
| 3 |
+
size 6149
|
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__activations_weight_0__activations_loss_fn_(fn_soft_mse_loss())__attentions_weight_0__attentions_loss_fn/events.out.tfevents.1723582039.93d6cbb3ad53
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83904741a07317315e1899c4af70a28699979582ad6255f4dd296338ea2d6312
|
| 3 |
+
size 529
|
logs/distillation_objective=MultiObjective(logits_weight_1__logits_loss_fn_(fn_kl_divergence_loss())__hs_weight_0.2__hs_loss_fn_(fn_cross_entropy())__attn_weight_0__attn_loss_fn_(fn_soft_mse_loss()))/events.out.tfevents.1723583180.93d6cbb3ad53
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e1d6e7a59cae204ed1614235b2190a556aee3dfd6fcb5908e2b788166beb9af
|
| 3 |
+
size 6729491
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 248894656
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fbca352ddfbff6dcf1dc3331aec242444664e53c80bb40bc960208191158d6c
|
| 3 |
size 248894656
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 907106756
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37139f44fbbb7f4d764e54596df0a00f47778729fc8d96898077cd95bbf26dad
|
| 3 |
size 907106756
|