hariniiiiiiiiii
commited on
Commit
•
024a457
1
Parent(s):
7f9042d
Training in progress, step 4500
Browse files- config.json +1 -1
- last-checkpoint/config.json +1 -1
- last-checkpoint/generation_config.json +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +363 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- runs/Feb11_14-00-34_74e5e7b42358/1676124921.3277113/events.out.tfevents.1676124921.74e5e7b42358.292.1 +3 -0
- runs/Feb11_14-00-34_74e5e7b42358/events.out.tfevents.1676124921.74e5e7b42358.292.0 +3 -0
- training_args.bin +1 -1
config.json
CHANGED
@@ -30,7 +30,7 @@
|
|
30 |
"tie_word_embeddings": false,
|
31 |
"tokenizer_class": "T5Tokenizer",
|
32 |
"torch_dtype": "float32",
|
33 |
-
"transformers_version": "4.26.
|
34 |
"use_cache": true,
|
35 |
"vocab_size": 250112
|
36 |
}
|
|
|
30 |
"tie_word_embeddings": false,
|
31 |
"tokenizer_class": "T5Tokenizer",
|
32 |
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
"use_cache": true,
|
35 |
"vocab_size": 250112
|
36 |
}
|
last-checkpoint/config.json
CHANGED
@@ -30,7 +30,7 @@
|
|
30 |
"tie_word_embeddings": false,
|
31 |
"tokenizer_class": "T5Tokenizer",
|
32 |
"torch_dtype": "float32",
|
33 |
-
"transformers_version": "4.26.
|
34 |
"use_cache": true,
|
35 |
"vocab_size": 250112
|
36 |
}
|
|
|
30 |
"tie_word_embeddings": false,
|
31 |
"tokenizer_class": "T5Tokenizer",
|
32 |
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
"use_cache": true,
|
35 |
"vocab_size": 250112
|
36 |
}
|
last-checkpoint/generation_config.json
CHANGED
@@ -7,5 +7,5 @@
|
|
7 |
"no_repeat_ngram_size": 2,
|
8 |
"num_beams": 15,
|
9 |
"pad_token_id": 0,
|
10 |
-
"transformers_version": "4.26.
|
11 |
}
|
|
|
7 |
"no_repeat_ngram_size": 2,
|
8 |
"num_beams": 15,
|
9 |
"pad_token_id": 0,
|
10 |
+
"transformers_version": "4.26.1"
|
11 |
}
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4115013
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:979fae33b211827802e243d4b2113809f0f0cecb9b7c1d248e072e2b037b2cb0
|
3 |
size 4115013
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2329702453
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19f4336db612efb7d62e5471628f98da8a89b2945716dfd11cbdd02c45bd395f
|
3 |
size 2329702453
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4c4dc079a6baea2794599f0f64419cf3494f7a5dbc363ce3a63466ab6608372
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eea54d69309341d06e47639645523531f831191f588becdb9503cc4509f35e8f
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2886,11 +2886,371 @@
|
|
2886 |
"eval_samples_per_second": 0.22,
|
2887 |
"eval_steps_per_second": 0.22,
|
2888 |
"step": 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2889 |
}
|
2890 |
],
|
2891 |
"max_steps": 5060,
|
2892 |
"num_train_epochs": 10,
|
2893 |
-
"total_flos":
|
2894 |
"trial_name": null,
|
2895 |
"trial_params": null
|
2896 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.891628652447293,
|
5 |
+
"global_step": 4500,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2886 |
"eval_samples_per_second": 0.22,
|
2887 |
"eval_steps_per_second": 0.22,
|
2888 |
"step": 4000
|
2889 |
+
},
|
2890 |
+
{
|
2891 |
+
"epoch": 7.92,
|
2892 |
+
"learning_rate": 0.00010563380281690141,
|
2893 |
+
"loss": 0.1264,
|
2894 |
+
"step": 4010
|
2895 |
+
},
|
2896 |
+
{
|
2897 |
+
"epoch": 7.94,
|
2898 |
+
"learning_rate": 0.00010462776659959759,
|
2899 |
+
"loss": 0.1026,
|
2900 |
+
"step": 4020
|
2901 |
+
},
|
2902 |
+
{
|
2903 |
+
"epoch": 7.96,
|
2904 |
+
"learning_rate": 0.00010362173038229377,
|
2905 |
+
"loss": 0.1348,
|
2906 |
+
"step": 4030
|
2907 |
+
},
|
2908 |
+
{
|
2909 |
+
"epoch": 7.98,
|
2910 |
+
"learning_rate": 0.00010261569416498995,
|
2911 |
+
"loss": 0.1893,
|
2912 |
+
"step": 4040
|
2913 |
+
},
|
2914 |
+
{
|
2915 |
+
"epoch": 8.0,
|
2916 |
+
"learning_rate": 0.00010160965794768612,
|
2917 |
+
"loss": 0.1048,
|
2918 |
+
"step": 4050
|
2919 |
+
},
|
2920 |
+
{
|
2921 |
+
"epoch": 8.02,
|
2922 |
+
"learning_rate": 0.0001006036217303823,
|
2923 |
+
"loss": 0.1576,
|
2924 |
+
"step": 4060
|
2925 |
+
},
|
2926 |
+
{
|
2927 |
+
"epoch": 8.04,
|
2928 |
+
"learning_rate": 9.959758551307847e-05,
|
2929 |
+
"loss": 0.1084,
|
2930 |
+
"step": 4070
|
2931 |
+
},
|
2932 |
+
{
|
2933 |
+
"epoch": 8.06,
|
2934 |
+
"learning_rate": 9.859154929577464e-05,
|
2935 |
+
"loss": 0.089,
|
2936 |
+
"step": 4080
|
2937 |
+
},
|
2938 |
+
{
|
2939 |
+
"epoch": 8.08,
|
2940 |
+
"learning_rate": 9.758551307847083e-05,
|
2941 |
+
"loss": 0.0989,
|
2942 |
+
"step": 4090
|
2943 |
+
},
|
2944 |
+
{
|
2945 |
+
"epoch": 8.1,
|
2946 |
+
"learning_rate": 9.6579476861167e-05,
|
2947 |
+
"loss": 0.108,
|
2948 |
+
"step": 4100
|
2949 |
+
},
|
2950 |
+
{
|
2951 |
+
"epoch": 8.1,
|
2952 |
+
"eval_loss": 0.9726575016975403,
|
2953 |
+
"eval_rouge1": 0.1383333333333333,
|
2954 |
+
"eval_rouge2": 0.10340909090909092,
|
2955 |
+
"eval_rougeL": 0.14448717948717948,
|
2956 |
+
"eval_rougeLsum": 0.13666666666666666,
|
2957 |
+
"eval_runtime": 89.426,
|
2958 |
+
"eval_samples_per_second": 0.224,
|
2959 |
+
"eval_steps_per_second": 0.224,
|
2960 |
+
"step": 4100
|
2961 |
+
},
|
2962 |
+
{
|
2963 |
+
"epoch": 8.12,
|
2964 |
+
"learning_rate": 9.557344064386318e-05,
|
2965 |
+
"loss": 0.0576,
|
2966 |
+
"step": 4110
|
2967 |
+
},
|
2968 |
+
{
|
2969 |
+
"epoch": 8.14,
|
2970 |
+
"learning_rate": 9.456740442655936e-05,
|
2971 |
+
"loss": 0.0937,
|
2972 |
+
"step": 4120
|
2973 |
+
},
|
2974 |
+
{
|
2975 |
+
"epoch": 8.16,
|
2976 |
+
"learning_rate": 9.356136820925553e-05,
|
2977 |
+
"loss": 0.0814,
|
2978 |
+
"step": 4130
|
2979 |
+
},
|
2980 |
+
{
|
2981 |
+
"epoch": 8.18,
|
2982 |
+
"learning_rate": 9.255533199195171e-05,
|
2983 |
+
"loss": 0.0832,
|
2984 |
+
"step": 4140
|
2985 |
+
},
|
2986 |
+
{
|
2987 |
+
"epoch": 8.2,
|
2988 |
+
"learning_rate": 9.15492957746479e-05,
|
2989 |
+
"loss": 0.0881,
|
2990 |
+
"step": 4150
|
2991 |
+
},
|
2992 |
+
{
|
2993 |
+
"epoch": 8.22,
|
2994 |
+
"learning_rate": 9.054325955734406e-05,
|
2995 |
+
"loss": 0.0785,
|
2996 |
+
"step": 4160
|
2997 |
+
},
|
2998 |
+
{
|
2999 |
+
"epoch": 8.24,
|
3000 |
+
"learning_rate": 8.953722334004025e-05,
|
3001 |
+
"loss": 0.1046,
|
3002 |
+
"step": 4170
|
3003 |
+
},
|
3004 |
+
{
|
3005 |
+
"epoch": 8.26,
|
3006 |
+
"learning_rate": 8.853118712273642e-05,
|
3007 |
+
"loss": 0.1137,
|
3008 |
+
"step": 4180
|
3009 |
+
},
|
3010 |
+
{
|
3011 |
+
"epoch": 8.28,
|
3012 |
+
"learning_rate": 8.75251509054326e-05,
|
3013 |
+
"loss": 0.0966,
|
3014 |
+
"step": 4190
|
3015 |
+
},
|
3016 |
+
{
|
3017 |
+
"epoch": 8.3,
|
3018 |
+
"learning_rate": 8.651911468812877e-05,
|
3019 |
+
"loss": 0.1292,
|
3020 |
+
"step": 4200
|
3021 |
+
},
|
3022 |
+
{
|
3023 |
+
"epoch": 8.3,
|
3024 |
+
"eval_loss": 0.9639500379562378,
|
3025 |
+
"eval_rouge1": 0.21000000000000002,
|
3026 |
+
"eval_rouge2": 0.12563131313131312,
|
3027 |
+
"eval_rougeL": 0.2097902097902098,
|
3028 |
+
"eval_rougeLsum": 0.20979020979020976,
|
3029 |
+
"eval_runtime": 86.8779,
|
3030 |
+
"eval_samples_per_second": 0.23,
|
3031 |
+
"eval_steps_per_second": 0.23,
|
3032 |
+
"step": 4200
|
3033 |
+
},
|
3034 |
+
{
|
3035 |
+
"epoch": 8.32,
|
3036 |
+
"learning_rate": 8.551307847082495e-05,
|
3037 |
+
"loss": 0.099,
|
3038 |
+
"step": 4210
|
3039 |
+
},
|
3040 |
+
{
|
3041 |
+
"epoch": 8.34,
|
3042 |
+
"learning_rate": 8.450704225352113e-05,
|
3043 |
+
"loss": 0.082,
|
3044 |
+
"step": 4220
|
3045 |
+
},
|
3046 |
+
{
|
3047 |
+
"epoch": 8.36,
|
3048 |
+
"learning_rate": 8.350100603621731e-05,
|
3049 |
+
"loss": 0.1007,
|
3050 |
+
"step": 4230
|
3051 |
+
},
|
3052 |
+
{
|
3053 |
+
"epoch": 8.38,
|
3054 |
+
"learning_rate": 8.249496981891348e-05,
|
3055 |
+
"loss": 0.0826,
|
3056 |
+
"step": 4240
|
3057 |
+
},
|
3058 |
+
{
|
3059 |
+
"epoch": 8.4,
|
3060 |
+
"learning_rate": 8.148893360160967e-05,
|
3061 |
+
"loss": 0.0823,
|
3062 |
+
"step": 4250
|
3063 |
+
},
|
3064 |
+
{
|
3065 |
+
"epoch": 8.42,
|
3066 |
+
"learning_rate": 8.048289738430584e-05,
|
3067 |
+
"loss": 0.0863,
|
3068 |
+
"step": 4260
|
3069 |
+
},
|
3070 |
+
{
|
3071 |
+
"epoch": 8.44,
|
3072 |
+
"learning_rate": 7.9476861167002e-05,
|
3073 |
+
"loss": 0.1037,
|
3074 |
+
"step": 4270
|
3075 |
+
},
|
3076 |
+
{
|
3077 |
+
"epoch": 8.46,
|
3078 |
+
"learning_rate": 7.847082494969819e-05,
|
3079 |
+
"loss": 0.097,
|
3080 |
+
"step": 4280
|
3081 |
+
},
|
3082 |
+
{
|
3083 |
+
"epoch": 8.48,
|
3084 |
+
"learning_rate": 7.746478873239437e-05,
|
3085 |
+
"loss": 0.0589,
|
3086 |
+
"step": 4290
|
3087 |
+
},
|
3088 |
+
{
|
3089 |
+
"epoch": 8.5,
|
3090 |
+
"learning_rate": 7.645875251509054e-05,
|
3091 |
+
"loss": 0.0868,
|
3092 |
+
"step": 4300
|
3093 |
+
},
|
3094 |
+
{
|
3095 |
+
"epoch": 8.5,
|
3096 |
+
"eval_loss": 0.9618169069290161,
|
3097 |
+
"eval_rouge1": 0.15,
|
3098 |
+
"eval_rouge2": 0.09431818181818181,
|
3099 |
+
"eval_rougeL": 0.15076923076923077,
|
3100 |
+
"eval_rougeLsum": 0.1465384615384615,
|
3101 |
+
"eval_runtime": 86.2134,
|
3102 |
+
"eval_samples_per_second": 0.232,
|
3103 |
+
"eval_steps_per_second": 0.232,
|
3104 |
+
"step": 4300
|
3105 |
+
},
|
3106 |
+
{
|
3107 |
+
"epoch": 8.52,
|
3108 |
+
"learning_rate": 7.545271629778672e-05,
|
3109 |
+
"loss": 0.0964,
|
3110 |
+
"step": 4310
|
3111 |
+
},
|
3112 |
+
{
|
3113 |
+
"epoch": 8.54,
|
3114 |
+
"learning_rate": 7.444668008048291e-05,
|
3115 |
+
"loss": 0.1144,
|
3116 |
+
"step": 4320
|
3117 |
+
},
|
3118 |
+
{
|
3119 |
+
"epoch": 8.56,
|
3120 |
+
"learning_rate": 7.344064386317907e-05,
|
3121 |
+
"loss": 0.1029,
|
3122 |
+
"step": 4330
|
3123 |
+
},
|
3124 |
+
{
|
3125 |
+
"epoch": 8.58,
|
3126 |
+
"learning_rate": 7.243460764587526e-05,
|
3127 |
+
"loss": 0.0978,
|
3128 |
+
"step": 4340
|
3129 |
+
},
|
3130 |
+
{
|
3131 |
+
"epoch": 8.6,
|
3132 |
+
"learning_rate": 7.142857142857142e-05,
|
3133 |
+
"loss": 0.142,
|
3134 |
+
"step": 4350
|
3135 |
+
},
|
3136 |
+
{
|
3137 |
+
"epoch": 8.62,
|
3138 |
+
"learning_rate": 7.042253521126761e-05,
|
3139 |
+
"loss": 0.0957,
|
3140 |
+
"step": 4360
|
3141 |
+
},
|
3142 |
+
{
|
3143 |
+
"epoch": 8.64,
|
3144 |
+
"learning_rate": 6.941649899396378e-05,
|
3145 |
+
"loss": 0.0896,
|
3146 |
+
"step": 4370
|
3147 |
+
},
|
3148 |
+
{
|
3149 |
+
"epoch": 8.65,
|
3150 |
+
"learning_rate": 6.841046277665996e-05,
|
3151 |
+
"loss": 0.0998,
|
3152 |
+
"step": 4380
|
3153 |
+
},
|
3154 |
+
{
|
3155 |
+
"epoch": 8.67,
|
3156 |
+
"learning_rate": 6.740442655935614e-05,
|
3157 |
+
"loss": 0.0828,
|
3158 |
+
"step": 4390
|
3159 |
+
},
|
3160 |
+
{
|
3161 |
+
"epoch": 8.69,
|
3162 |
+
"learning_rate": 6.639839034205232e-05,
|
3163 |
+
"loss": 0.1023,
|
3164 |
+
"step": 4400
|
3165 |
+
},
|
3166 |
+
{
|
3167 |
+
"epoch": 8.69,
|
3168 |
+
"eval_loss": 0.9609012603759766,
|
3169 |
+
"eval_rouge1": 0.18,
|
3170 |
+
"eval_rouge2": 0.075,
|
3171 |
+
"eval_rougeL": 0.18,
|
3172 |
+
"eval_rougeLsum": 0.18,
|
3173 |
+
"eval_runtime": 85.733,
|
3174 |
+
"eval_samples_per_second": 0.233,
|
3175 |
+
"eval_steps_per_second": 0.233,
|
3176 |
+
"step": 4400
|
3177 |
+
},
|
3178 |
+
{
|
3179 |
+
"epoch": 8.71,
|
3180 |
+
"learning_rate": 6.539235412474849e-05,
|
3181 |
+
"loss": 0.1324,
|
3182 |
+
"step": 4410
|
3183 |
+
},
|
3184 |
+
{
|
3185 |
+
"epoch": 8.73,
|
3186 |
+
"learning_rate": 6.438631790744468e-05,
|
3187 |
+
"loss": 0.1107,
|
3188 |
+
"step": 4420
|
3189 |
+
},
|
3190 |
+
{
|
3191 |
+
"epoch": 8.75,
|
3192 |
+
"learning_rate": 6.338028169014085e-05,
|
3193 |
+
"loss": 0.0756,
|
3194 |
+
"step": 4430
|
3195 |
+
},
|
3196 |
+
{
|
3197 |
+
"epoch": 8.77,
|
3198 |
+
"learning_rate": 6.237424547283703e-05,
|
3199 |
+
"loss": 0.1019,
|
3200 |
+
"step": 4440
|
3201 |
+
},
|
3202 |
+
{
|
3203 |
+
"epoch": 8.79,
|
3204 |
+
"learning_rate": 6.13682092555332e-05,
|
3205 |
+
"loss": 0.1232,
|
3206 |
+
"step": 4450
|
3207 |
+
},
|
3208 |
+
{
|
3209 |
+
"epoch": 8.81,
|
3210 |
+
"learning_rate": 6.036217303822938e-05,
|
3211 |
+
"loss": 0.1186,
|
3212 |
+
"step": 4460
|
3213 |
+
},
|
3214 |
+
{
|
3215 |
+
"epoch": 8.83,
|
3216 |
+
"learning_rate": 5.935613682092555e-05,
|
3217 |
+
"loss": 0.1093,
|
3218 |
+
"step": 4470
|
3219 |
+
},
|
3220 |
+
{
|
3221 |
+
"epoch": 8.85,
|
3222 |
+
"learning_rate": 5.8350100603621735e-05,
|
3223 |
+
"loss": 0.1009,
|
3224 |
+
"step": 4480
|
3225 |
+
},
|
3226 |
+
{
|
3227 |
+
"epoch": 8.87,
|
3228 |
+
"learning_rate": 5.734406438631791e-05,
|
3229 |
+
"loss": 0.0878,
|
3230 |
+
"step": 4490
|
3231 |
+
},
|
3232 |
+
{
|
3233 |
+
"epoch": 8.89,
|
3234 |
+
"learning_rate": 5.6338028169014086e-05,
|
3235 |
+
"loss": 0.1102,
|
3236 |
+
"step": 4500
|
3237 |
+
},
|
3238 |
+
{
|
3239 |
+
"epoch": 8.89,
|
3240 |
+
"eval_loss": 0.9644363522529602,
|
3241 |
+
"eval_rouge1": 0.14615384615384613,
|
3242 |
+
"eval_rouge2": 0.1,
|
3243 |
+
"eval_rougeL": 0.15115384615384614,
|
3244 |
+
"eval_rougeLsum": 0.145,
|
3245 |
+
"eval_runtime": 83.9759,
|
3246 |
+
"eval_samples_per_second": 0.238,
|
3247 |
+
"eval_steps_per_second": 0.238,
|
3248 |
+
"step": 4500
|
3249 |
}
|
3250 |
],
|
3251 |
"max_steps": 5060,
|
3252 |
"num_train_epochs": 10,
|
3253 |
+
"total_flos": 1.0952350126776115e+17,
|
3254 |
"trial_name": null,
|
3255 |
"trial_params": null
|
3256 |
}
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3643
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77a4f7038b50ddc31edcc81a5b698eebafae24d0db54d6acae4be1a9f931706a
|
3 |
size 3643
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2329702453
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19f4336db612efb7d62e5471628f98da8a89b2945716dfd11cbdd02c45bd395f
|
3 |
size 2329702453
|
runs/Feb11_14-00-34_74e5e7b42358/1676124921.3277113/events.out.tfevents.1676124921.74e5e7b42358.292.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd4d2a0362ea25dd375e16ee87a5e18c502572377dceecc7cd828dda97eea7c9
|
3 |
+
size 5952
|
runs/Feb11_14-00-34_74e5e7b42358/events.out.tfevents.1676124921.74e5e7b42358.292.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e58a04a0e477b9d2ba2e1e319284b1a33bbd6d6814f990542656072fb5f45d84
|
3 |
+
size 14428
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3643
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77a4f7038b50ddc31edcc81a5b698eebafae24d0db54d6acae4be1a9f931706a
|
3 |
size 3643
|