Federic commited on
Commit
fcbb92d
1 Parent(s): b023bd9

Training in progress, step 350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58bfc760e91b0d21d65d7923896fc95c4f25a4f944fedcb622d03de278369d4c
3
  size 838904832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:420b8a326986f8e830cd838bbf34a7080a1e6d4ec3ce8653b2e4eebadcfa2c77
3
  size 838904832
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a938569403d232d9b7ce7418e5911974858cee98b87ac1d1130486b3b546ff4
3
  size 421458386
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7c1879fc8f1886e9f801904c72d7dbe4bf99bc2fd9a6c759591a82e966b8031
3
  size 421458386
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef8593826016a9c5e91074c43813975f98ccc4aaa6700967895ddedc6022efdd
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65a3ee4d233a8121789677494e2ecd1a75477024c32056ca4bbd20461a9ed78a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d0166e84a24bb5e8fbd3eec4a559ab0d784d0de1c9f1ce37bbb473bd77a0781
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32cd83ea7b3c3d26f4b1d83df062610c973ecff83b1326ee086a5f9f50324c14
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3,
5
  "eval_steps": 500,
6
- "global_step": 325,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1957,13 +1957,163 @@
1957
  "learning_rate": 0.0002,
1958
  "loss": 0.4449,
1959
  "step": 325
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1960
  }
1961
  ],
1962
  "logging_steps": 1,
1963
  "max_steps": 500,
1964
  "num_train_epochs": 2,
1965
  "save_steps": 25,
1966
- "total_flos": 3.921061058162688e+16,
1967
  "trial_name": null,
1968
  "trial_params": null
1969
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4,
5
  "eval_steps": 500,
6
+ "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1957
  "learning_rate": 0.0002,
1958
  "loss": 0.4449,
1959
  "step": 325
1960
+ },
1961
+ {
1962
+ "epoch": 1.3,
1963
+ "learning_rate": 0.0002,
1964
+ "loss": 0.3912,
1965
+ "step": 326
1966
+ },
1967
+ {
1968
+ "epoch": 1.31,
1969
+ "learning_rate": 0.0002,
1970
+ "loss": 0.4508,
1971
+ "step": 327
1972
+ },
1973
+ {
1974
+ "epoch": 1.31,
1975
+ "learning_rate": 0.0002,
1976
+ "loss": 0.4124,
1977
+ "step": 328
1978
+ },
1979
+ {
1980
+ "epoch": 1.32,
1981
+ "learning_rate": 0.0002,
1982
+ "loss": 0.4305,
1983
+ "step": 329
1984
+ },
1985
+ {
1986
+ "epoch": 1.32,
1987
+ "learning_rate": 0.0002,
1988
+ "loss": 0.4207,
1989
+ "step": 330
1990
+ },
1991
+ {
1992
+ "epoch": 1.32,
1993
+ "learning_rate": 0.0002,
1994
+ "loss": 0.3785,
1995
+ "step": 331
1996
+ },
1997
+ {
1998
+ "epoch": 1.33,
1999
+ "learning_rate": 0.0002,
2000
+ "loss": 0.428,
2001
+ "step": 332
2002
+ },
2003
+ {
2004
+ "epoch": 1.33,
2005
+ "learning_rate": 0.0002,
2006
+ "loss": 0.3683,
2007
+ "step": 333
2008
+ },
2009
+ {
2010
+ "epoch": 1.34,
2011
+ "learning_rate": 0.0002,
2012
+ "loss": 0.3742,
2013
+ "step": 334
2014
+ },
2015
+ {
2016
+ "epoch": 1.34,
2017
+ "learning_rate": 0.0002,
2018
+ "loss": 0.3734,
2019
+ "step": 335
2020
+ },
2021
+ {
2022
+ "epoch": 1.34,
2023
+ "learning_rate": 0.0002,
2024
+ "loss": 0.3748,
2025
+ "step": 336
2026
+ },
2027
+ {
2028
+ "epoch": 1.35,
2029
+ "learning_rate": 0.0002,
2030
+ "loss": 0.4496,
2031
+ "step": 337
2032
+ },
2033
+ {
2034
+ "epoch": 1.35,
2035
+ "learning_rate": 0.0002,
2036
+ "loss": 0.3368,
2037
+ "step": 338
2038
+ },
2039
+ {
2040
+ "epoch": 1.36,
2041
+ "learning_rate": 0.0002,
2042
+ "loss": 0.3801,
2043
+ "step": 339
2044
+ },
2045
+ {
2046
+ "epoch": 1.36,
2047
+ "learning_rate": 0.0002,
2048
+ "loss": 0.3133,
2049
+ "step": 340
2050
+ },
2051
+ {
2052
+ "epoch": 1.36,
2053
+ "learning_rate": 0.0002,
2054
+ "loss": 0.3159,
2055
+ "step": 341
2056
+ },
2057
+ {
2058
+ "epoch": 1.37,
2059
+ "learning_rate": 0.0002,
2060
+ "loss": 0.3227,
2061
+ "step": 342
2062
+ },
2063
+ {
2064
+ "epoch": 1.37,
2065
+ "learning_rate": 0.0002,
2066
+ "loss": 0.3268,
2067
+ "step": 343
2068
+ },
2069
+ {
2070
+ "epoch": 1.38,
2071
+ "learning_rate": 0.0002,
2072
+ "loss": 0.402,
2073
+ "step": 344
2074
+ },
2075
+ {
2076
+ "epoch": 1.38,
2077
+ "learning_rate": 0.0002,
2078
+ "loss": 0.327,
2079
+ "step": 345
2080
+ },
2081
+ {
2082
+ "epoch": 1.38,
2083
+ "learning_rate": 0.0002,
2084
+ "loss": 0.2898,
2085
+ "step": 346
2086
+ },
2087
+ {
2088
+ "epoch": 1.39,
2089
+ "learning_rate": 0.0002,
2090
+ "loss": 0.3437,
2091
+ "step": 347
2092
+ },
2093
+ {
2094
+ "epoch": 1.39,
2095
+ "learning_rate": 0.0002,
2096
+ "loss": 0.3099,
2097
+ "step": 348
2098
+ },
2099
+ {
2100
+ "epoch": 1.4,
2101
+ "learning_rate": 0.0002,
2102
+ "loss": 0.2742,
2103
+ "step": 349
2104
+ },
2105
+ {
2106
+ "epoch": 1.4,
2107
+ "learning_rate": 0.0002,
2108
+ "loss": 0.3122,
2109
+ "step": 350
2110
  }
2111
  ],
2112
  "logging_steps": 1,
2113
  "max_steps": 500,
2114
  "num_train_epochs": 2,
2115
  "save_steps": 25,
2116
+ "total_flos": 4.187771329769472e+16,
2117
  "trial_name": null,
2118
  "trial_params": null
2119
  }