AlekseyKorshuk
commited on
Commit
•
940b328
1
Parent(s):
0e73214
huggingartists
Browse files- README.md +3 -3
- evaluation.txt +1 -1
- flax_model.msgpack +1 -1
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +152 -6
- training_args.bin +1 -1
README.md
CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
|
|
45 |
dataset = load_dataset("huggingartists/drake")
|
46 |
```
|
47 |
|
48 |
-
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Drake's lyrics.
|
53 |
|
54 |
-
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/
|
55 |
|
56 |
-
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/
|
57 |
|
58 |
## How to use
|
59 |
|
|
|
45 |
dataset = load_dataset("huggingartists/drake")
|
46 |
```
|
47 |
|
48 |
+
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2e42ok17/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Drake's lyrics.
|
53 |
|
54 |
+
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2xe72oq3) for full transparency and reproducibility.
|
55 |
|
56 |
+
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2xe72oq3/artifacts) is logged and versioned.
|
57 |
|
58 |
## How to use
|
59 |
|
evaluation.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"eval_loss": 2.
|
|
|
1 |
+
{"eval_loss": 2.6586830615997314, "eval_runtime": 12.618, "eval_samples_per_second": 43.906, "eval_steps_per_second": 5.548, "epoch": 6.0}
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 497764120
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb6ff1968e33bafe123ccca386bcb5b25cdd34682f12236ab1c25d04630b79f6
|
3 |
size 497764120
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995604017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de10120cd339dc8a2411ec8e4b7e1d4af7983ab863a59e4fb699101f393fde92
|
3 |
size 995604017
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510396521
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cce53ebbc5d19a5a3d4fe01b3d7363a3a0c4f9cb9bdafea201bad39efd63457
|
3 |
size 510396521
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:326cc7de00c3f20c4fcf0338fdce1a6ac81da32e3abb8094e79488b6be2ca9fb
|
3 |
size 14567
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ae78cbe0368ef751528f9df25bdcc580eaf09b6340910f9e0e6aae733d99451
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 2.
|
3 |
-
"best_model_checkpoint": "output/drake/checkpoint-
|
4 |
"epoch": 5.0,
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2006,11 +2006,157 @@
|
|
2006 |
"eval_samples_per_second": 142.792,
|
2007 |
"eval_steps_per_second": 18.118,
|
2008 |
"step": 1640
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2009 |
}
|
2010 |
],
|
2011 |
-
"max_steps":
|
2012 |
-
"num_train_epochs":
|
2013 |
-
"total_flos":
|
2014 |
"trial_name": null,
|
2015 |
"trial_params": null
|
2016 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 2.6586830615997314,
|
3 |
+
"best_model_checkpoint": "output/drake/checkpoint-1755",
|
4 |
"epoch": 5.0,
|
5 |
+
"global_step": 1755,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2006 |
"eval_samples_per_second": 142.792,
|
2007 |
"eval_steps_per_second": 18.118,
|
2008 |
"step": 1640
|
2009 |
+
},
|
2010 |
+
{
|
2011 |
+
"epoch": 4.69,
|
2012 |
+
"learning_rate": 3.064756856761554e-05,
|
2013 |
+
"loss": 2.7681,
|
2014 |
+
"step": 1645
|
2015 |
+
},
|
2016 |
+
{
|
2017 |
+
"epoch": 4.7,
|
2018 |
+
"learning_rate": 2.8129056814413798e-05,
|
2019 |
+
"loss": 2.7734,
|
2020 |
+
"step": 1650
|
2021 |
+
},
|
2022 |
+
{
|
2023 |
+
"epoch": 4.72,
|
2024 |
+
"learning_rate": 2.5691584488661662e-05,
|
2025 |
+
"loss": 2.8042,
|
2026 |
+
"step": 1655
|
2027 |
+
},
|
2028 |
+
{
|
2029 |
+
"epoch": 4.73,
|
2030 |
+
"learning_rate": 2.3340032409686368e-05,
|
2031 |
+
"loss": 2.8781,
|
2032 |
+
"step": 1660
|
2033 |
+
},
|
2034 |
+
{
|
2035 |
+
"epoch": 4.74,
|
2036 |
+
"learning_rate": 2.1079109349241507e-05,
|
2037 |
+
"loss": 2.8162,
|
2038 |
+
"step": 1665
|
2039 |
+
},
|
2040 |
+
{
|
2041 |
+
"epoch": 4.76,
|
2042 |
+
"learning_rate": 1.891334260261341e-05,
|
2043 |
+
"loss": 2.523,
|
2044 |
+
"step": 1670
|
2045 |
+
},
|
2046 |
+
{
|
2047 |
+
"epoch": 4.77,
|
2048 |
+
"learning_rate": 1.6847068923119758e-05,
|
2049 |
+
"loss": 2.8495,
|
2050 |
+
"step": 1675
|
2051 |
+
},
|
2052 |
+
{
|
2053 |
+
"epoch": 4.79,
|
2054 |
+
"learning_rate": 1.4884425838151134e-05,
|
2055 |
+
"loss": 2.791,
|
2056 |
+
"step": 1680
|
2057 |
+
},
|
2058 |
+
{
|
2059 |
+
"epoch": 4.8,
|
2060 |
+
"learning_rate": 1.3029343364143164e-05,
|
2061 |
+
"loss": 2.6161,
|
2062 |
+
"step": 1685
|
2063 |
+
},
|
2064 |
+
{
|
2065 |
+
"epoch": 4.81,
|
2066 |
+
"learning_rate": 1.1285536137072603e-05,
|
2067 |
+
"loss": 2.8099,
|
2068 |
+
"step": 1690
|
2069 |
+
},
|
2070 |
+
{
|
2071 |
+
"epoch": 4.83,
|
2072 |
+
"learning_rate": 9.65649597423201e-06,
|
2073 |
+
"loss": 2.9416,
|
2074 |
+
"step": 1695
|
2075 |
+
},
|
2076 |
+
{
|
2077 |
+
"epoch": 4.84,
|
2078 |
+
"learning_rate": 8.145484882179486e-06,
|
2079 |
+
"loss": 2.881,
|
2080 |
+
"step": 1700
|
2081 |
+
},
|
2082 |
+
{
|
2083 |
+
"epoch": 4.86,
|
2084 |
+
"learning_rate": 6.755528524864052e-06,
|
2085 |
+
"loss": 2.4485,
|
2086 |
+
"step": 1705
|
2087 |
+
},
|
2088 |
+
{
|
2089 |
+
"epoch": 4.87,
|
2090 |
+
"learning_rate": 5.4894101650047195e-06,
|
2091 |
+
"loss": 2.7043,
|
2092 |
+
"step": 1710
|
2093 |
+
},
|
2094 |
+
{
|
2095 |
+
"epoch": 4.89,
|
2096 |
+
"learning_rate": 4.349665090857373e-06,
|
2097 |
+
"loss": 2.9465,
|
2098 |
+
"step": 1715
|
2099 |
+
},
|
2100 |
+
{
|
2101 |
+
"epoch": 4.9,
|
2102 |
+
"learning_rate": 3.3385755395280496e-06,
|
2103 |
+
"loss": 2.8094,
|
2104 |
+
"step": 1720
|
2105 |
+
},
|
2106 |
+
{
|
2107 |
+
"epoch": 4.91,
|
2108 |
+
"learning_rate": 2.4581661269975954e-06,
|
2109 |
+
"loss": 2.8454,
|
2110 |
+
"step": 1725
|
2111 |
+
},
|
2112 |
+
{
|
2113 |
+
"epoch": 4.93,
|
2114 |
+
"learning_rate": 1.7101997940107123e-06,
|
2115 |
+
"loss": 2.717,
|
2116 |
+
"step": 1730
|
2117 |
+
},
|
2118 |
+
{
|
2119 |
+
"epoch": 4.94,
|
2120 |
+
"learning_rate": 1.0961742759449984e-06,
|
2121 |
+
"loss": 2.6812,
|
2122 |
+
"step": 1735
|
2123 |
+
},
|
2124 |
+
{
|
2125 |
+
"epoch": 4.96,
|
2126 |
+
"learning_rate": 6.173191037306866e-07,
|
2127 |
+
"loss": 2.7196,
|
2128 |
+
"step": 1740
|
2129 |
+
},
|
2130 |
+
{
|
2131 |
+
"epoch": 4.97,
|
2132 |
+
"learning_rate": 2.7459314182545106e-07,
|
2133 |
+
"loss": 2.8512,
|
2134 |
+
"step": 1745
|
2135 |
+
},
|
2136 |
+
{
|
2137 |
+
"epoch": 4.99,
|
2138 |
+
"learning_rate": 6.868266817434572e-08,
|
2139 |
+
"loss": 2.8381,
|
2140 |
+
"step": 1750
|
2141 |
+
},
|
2142 |
+
{
|
2143 |
+
"epoch": 5.0,
|
2144 |
+
"learning_rate": 0.0,
|
2145 |
+
"loss": 2.7245,
|
2146 |
+
"step": 1755
|
2147 |
+
},
|
2148 |
+
{
|
2149 |
+
"epoch": 5.0,
|
2150 |
+
"eval_loss": 2.6586830615997314,
|
2151 |
+
"eval_runtime": 12.7857,
|
2152 |
+
"eval_samples_per_second": 43.33,
|
2153 |
+
"eval_steps_per_second": 5.475,
|
2154 |
+
"step": 1755
|
2155 |
}
|
2156 |
],
|
2157 |
+
"max_steps": 2106,
|
2158 |
+
"num_train_epochs": 6,
|
2159 |
+
"total_flos": 1831003914240000.0,
|
2160 |
"trial_name": null,
|
2161 |
"trial_params": null
|
2162 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3311
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b14d0e1bec474f2c0b3b9e74e87729a4504a63316afeed249b70817f8d53a13
|
3 |
size 3311
|