huggingartists

Browse files

Files changed (16) hide show

README.md +97 -0
config.json +42 -0
evaluation.txt +1 -0
flax_model.msgpack +3 -0
headie-one.py +107 -0
merges.txt +0 -0
optimizer.pt +3 -0
pytorch_model.bin +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +5 -0
tokenizer.json +0 -0
tokenizer_config.json +10 -0
trainer_state.json +592 -0
training_args.bin +3 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,97 @@

+---
+language: en
+datasets:
+- huggingartists/headie-one
+tags:
+- huggingartists
+- lyrics
+- lm-head
+- causal-lm
+widget:
+- text: "I am"
+---
+<div class="inline-flex flex-col" style="line-height: 1.5;">
+    <div class="flex">
+        <div
+			style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; background-size: cover; background-image: url(&#39;https://images.genius.com/f803e312226f5034989742ff1fb4b583.1000x1000x1.jpg&#39;)">
+        </div>
+    </div>
+    <div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">🤖 HuggingArtists Model 🤖</div>
+    <div style="text-align: center; font-size: 16px; font-weight: 800">Headie One</div>
+    <a href="https://genius.com/artists/headie-one">
+    	<div style="text-align: center; font-size: 14px;">@headie-one</div>
+    </a>
+</div>
+I was made with [huggingartists](https://github.com/AlekseyKorshuk/huggingartists).
+Create your own bot based on your favorite artist with [the demo](https://colab.research.google.com/github/AlekseyKorshuk/huggingartists/blob/master/huggingartists-demo.ipynb)!
+## How does it work?
+To understand how the model was developed, check the [W&B report](https://wandb.ai/huggingartists/huggingartists/reportlist).
+## Training data
+The model was trained on lyrics from Headie One.
+Dataset is available [here](https://huggingface.co/datasets/huggingartists/headie-one).
+And can be used with:
+```python
+from datasets import load_dataset
+dataset = load_dataset("huggingartists/headie-one")
+```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/x7sbsok3/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
+## Training procedure
+The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Headie One's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/23dok566) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/23dok566/artifacts) is logged and versioned.
+## How to use
+You can use this model directly with a pipeline for text generation:
+```python
+from transformers import pipeline
+generator = pipeline('text-generation',
+                     model='huggingartists/headie-one')
+generator("I am", num_return_sequences=5)
+```
+Or with Transformers library:
+```python
+from transformers import AutoTokenizer, AutoModelWithLMHead
+tokenizer = AutoTokenizer.from_pretrained("huggingartists/headie-one")
+model = AutoModelWithLMHead.from_pretrained("huggingartists/headie-one")
+```
+## Limitations and bias
+The model suffers from [the same limitations and bias as GPT-2](https://huggingface.co/gpt2#limitations-and-bias).
+In addition, the data present in the user's tweets further affects the text generated by the model.
+## About
+*Built by Aleksey Korshuk*
+[![Follow](https://img.shields.io/github/followers/AlekseyKorshuk?style=social)](https://github.com/AlekseyKorshuk)
+[![Follow](https://img.shields.io/twitter/follow/alekseykorshuk?style=social)](https://twitter.com/intent/follow?screen_name=alekseykorshuk)
+[![Follow](https://img.shields.io/badge/dynamic/json?color=blue&label=Telegram%20Channel&query=%24.result&url=https%3A%2F%2Fapi.telegram.org%2Fbot1929545866%3AAAFGhV-KKnegEcLiyYJxsc4zV6C-bdPEBtQ%2FgetChatMemberCount%3Fchat_id%3D-1001253621662&style=social&logo=telegram)](https://t.me/joinchat/_CQ04KjcJ-4yZTky)
+For more details, visit the project repository.
+[![GitHub stars](https://img.shields.io/github/stars/AlekseyKorshuk/huggingartists?style=social)](https://github.com/AlekseyKorshuk/huggingartists)

config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "_name_or_path": "headie-one",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 200,
+      "min_length": 100,
+      "temperature": 1.0,
+      "top_p": 0.95
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.20.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

evaluation.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"eval_loss": 1.6784825325012207, "eval_runtime": 1.6409, "eval_samples_per_second": 40.831, "eval_steps_per_second": 5.485, "epoch": 10.0}

flax_model.msgpack ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fecd751b5700963f44c2ee402ff0b7a4c67e61624a859a934f710eb191c45a74
+size 497764120

headie-one.py ADDED Viewed

	@@ -0,0 +1,107 @@

+# coding=utf-8
+# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Lyrics dataset parsed from Genius"""
+import csv
+import json
+import os
+import gzip
+import datasets
+_CITATION = """\
+@InProceedings{huggingartists:dataset,
+title = {Lyrics dataset},
+author={Aleksey Korshuk
+},
+year={2021}
+}
+"""
+_DESCRIPTION = """\
+This dataset is designed to generate lyrics with HuggingArtists.
+"""
+# Add a link to an official homepage for the dataset here
+_HOMEPAGE = "https://github.com/AlekseyKorshuk/huggingartists"
+# Add the licence for the dataset here if you can find it
+_LICENSE = "All rights belong to copyright holders"
+_URL = "https://huggingface.co/datasets/huggingartists/rammstein/resolve/main/datasets.json"
+# Name of the dataset
+class LyricsDataset(datasets.GeneratorBasedBuilder):
+    """Lyrics dataset"""
+    VERSION = datasets.Version("1.0.0")
+    def _info(self):
+        # This method specifies the datasets.DatasetInfo object which contains informations and typings for the dataset
+        features = datasets.Features(
+                {
+                    "text": datasets.Value("string"),
+                }
+            )
+        return datasets.DatasetInfo(
+            # This is the description that will appear on the datasets page.
+            description=_DESCRIPTION,
+            # This defines the different columns of the dataset and their types
+            features=features,  # Here we define them above because they are different between the two configurations
+            # If there's a common (input, target) tuple from the features,
+            # specify them here. They'll be used if as_supervised=True in
+            # builder.as_dataset.
+            supervised_keys=None,
+            # Homepage of the dataset for documentation
+            homepage=_HOMEPAGE,
+            # License for the dataset if available
+            license=_LICENSE,
+            # Citation for the dataset
+            citation=_CITATION,
+        )
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+        # This method is tasked with downloading/extracting the data and defining the splits depending on the configuration
+        # If several configurations are possible (listed in BUILDER_CONFIGS), the configuration selected by the user is in self.config.name
+        # dl_manager is a datasets.download.DownloadManager that can be used to download and extract URLs
+        # It can accept any type or nested list/dict and will give back the same structure with the url replaced with path to local files.
+        # By default the archives will be extracted and a path to a cached folder where they are extracted is returned instead of the archive
+        data_dir = dl_manager.download_and_extract(_URL)
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                # These kwargs will be passed to _generate_examples
+                gen_kwargs={
+                    "filepath": data_dir,
+                    "split": "train",
+                },
+            ),
+        ]
+    def _generate_examples(self, filepath, split):
+        """Yields examples as (key, example) tuples."""
+        # This method handles input defined in _split_generators to yield (key, example) tuples from the dataset.
+        with open(filepath, encoding="utf-8") as f:
+            data = json.load(f)
+            for id, pred in enumerate(data[split]):
+                yield id, {"text": pred}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d097a87db35207ad3d05130da50e9ebb55fa07513c0df666ca52f27cb06b358
+size 995604017

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:90d824a120c62c3e83dd6a73c662684e5dbd1b4b707b29c9ebf5e0acf99da375
+size 510396521

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66929c1ab9e7989bb184f169bf7f2442be88e09dfd4b503a328757b1b59345f6
+size 14567

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48279aa48ac21cf8f06ef968c16ddaba90d867b569da938adf4544c7aea84cf0
+size 623

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "name_or_path": "gpt2",
+  "special_tokens_map_file": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,592 @@

+{
+  "best_metric": 1.6784825325012207,
+  "best_model_checkpoint": "output/headie-one/checkpoint-423",
+  "epoch": 9.0,
+  "global_step": 423,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.00013340429968430322,
+      "loss": 3.3897,
+      "step": 5
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00012243723783011615,
+      "loss": 3.2804,
+      "step": 10
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 0.00010551244942700737,
+      "loss": 3.1867,
+      "step": 15
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 8.450286230835245e-05,
+      "loss": 3.1078,
+      "step": 20
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 6.173343555458685e-05,
+      "loss": 2.9997,
+      "step": 25
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 3.972387529741623e-05,
+      "loss": 3.037,
+      "step": 30
+    },
+    {
+      "epoch": 0.74,
+      "learning_rate": 2.090979946151954e-05,
+      "loss": 2.8067,
+      "step": 35
+    },
+    {
+      "epoch": 0.85,
+      "learning_rate": 7.373207860012155e-06,
+      "loss": 2.6461,
+      "step": 40
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 6.1208431258209e-07,
+      "loss": 2.7311,
+      "step": 45
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 2.646137237548828,
+      "eval_runtime": 1.4447,
+      "eval_samples_per_second": 46.378,
+      "eval_steps_per_second": 6.23,
+      "step": 47
+    },
+    {
+      "epoch": 1.06,
+      "learning_rate": 1.3746270344901413e-06,
+      "loss": 2.8414,
+      "step": 50
+    },
+    {
+      "epoch": 1.17,
+      "learning_rate": 9.576451662754438e-06,
+      "loss": 2.7047,
+      "step": 55
+    },
+    {
+      "epoch": 1.28,
+      "learning_rate": 2.4309929383066146e-05,
+      "loss": 2.7129,
+      "step": 60
+    },
+    {
+      "epoch": 1.38,
+      "learning_rate": 4.3944626783346644e-05,
+      "loss": 2.7639,
+      "step": 65
+    },
+    {
+      "epoch": 1.49,
+      "learning_rate": 6.630773257727353e-05,
+      "loss": 2.67,
+      "step": 70
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 8.892450484875447e-05,
+      "loss": 2.4349,
+      "step": 75
+    },
+    {
+      "epoch": 1.7,
+      "learning_rate": 0.00010929213048843373,
+      "loss": 2.6474,
+      "step": 80
+    },
+    {
+      "epoch": 1.81,
+      "learning_rate": 0.00012515669103944476,
+      "loss": 2.741,
+      "step": 85
+    },
+    {
+      "epoch": 1.91,
+      "learning_rate": 0.00013476258540873022,
+      "loss": 2.5921,
+      "step": 90
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 2.3871500492095947,
+      "eval_runtime": 1.4491,
+      "eval_samples_per_second": 46.235,
+      "eval_steps_per_second": 6.211,
+      "step": 94
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 0.00013704680787354832,
+      "loss": 2.4265,
+      "step": 95
+    },
+    {
+      "epoch": 2.13,
+      "learning_rate": 0.00013175658222600302,
+      "loss": 2.5165,
+      "step": 100
+    },
+    {
+      "epoch": 2.23,
+      "learning_rate": 0.00011947733444744994,
+      "loss": 2.1946,
+      "step": 105
+    },
+    {
+      "epoch": 2.34,
+      "learning_rate": 0.0001015679084058065,
+      "loss": 2.3182,
+      "step": 110
+    },
+    {
+      "epoch": 2.45,
+      "learning_rate": 8.001019372440279e-05,
+      "loss": 2.2781,
+      "step": 115
+    },
+    {
+      "epoch": 2.55,
+      "learning_rate": 5.718980627559731e-05,
+      "loss": 2.2824,
+      "step": 120
+    },
+    {
+      "epoch": 2.66,
+      "learning_rate": 3.563209159419354e-05,
+      "loss": 2.2139,
+      "step": 125
+    },
+    {
+      "epoch": 2.77,
+      "learning_rate": 1.772266555255008e-05,
+      "loss": 2.3554,
+      "step": 130
+    },
+    {
+      "epoch": 2.87,
+      "learning_rate": 5.443417773996978e-06,
+      "loss": 2.1634,
+      "step": 135
+    },
+    {
+      "epoch": 2.98,
+      "learning_rate": 1.5319212645169297e-07,
+      "loss": 2.2246,
+      "step": 140
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 2.2159550189971924,
+      "eval_runtime": 1.505,
+      "eval_samples_per_second": 44.519,
+      "eval_steps_per_second": 5.98,
+      "step": 141
+    },
+    {
+      "epoch": 3.09,
+      "learning_rate": 2.4374145912697595e-06,
+      "loss": 2.0033,
+      "step": 145
+    },
+    {
+      "epoch": 3.19,
+      "learning_rate": 1.204330896055522e-05,
+      "loss": 2.1305,
+      "step": 150
+    },
+    {
+      "epoch": 3.3,
+      "learning_rate": 2.790786951156628e-05,
+      "loss": 2.0744,
+      "step": 155
+    },
+    {
+      "epoch": 3.4,
+      "learning_rate": 4.8275495151245426e-05,
+      "loss": 2.1473,
+      "step": 160
+    },
+    {
+      "epoch": 3.51,
+      "learning_rate": 7.089226742272638e-05,
+      "loss": 2.2506,
+      "step": 165
+    },
+    {
+      "epoch": 3.62,
+      "learning_rate": 9.325537321665337e-05,
+      "loss": 2.157,
+      "step": 170
+    },
+    {
+      "epoch": 3.72,
+      "learning_rate": 0.00011289007061693382,
+      "loss": 2.1313,
+      "step": 175
+    },
+    {
+      "epoch": 3.83,
+      "learning_rate": 0.00012762354833724553,
+      "loss": 2.0593,
+      "step": 180
+    },
+    {
+      "epoch": 3.94,
+      "learning_rate": 0.00013582537296550986,
+      "loss": 2.0425,
+      "step": 185
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 2.1005759239196777,
+      "eval_runtime": 1.5665,
+      "eval_samples_per_second": 42.771,
+      "eval_steps_per_second": 5.745,
+      "step": 188
+    },
+    {
+      "epoch": 4.04,
+      "learning_rate": 0.0001365879156874179,
+      "loss": 1.9915,
+      "step": 190
+    },
+    {
+      "epoch": 4.15,
+      "learning_rate": 0.00012982679213998792,
+      "loss": 1.8713,
+      "step": 195
+    },
+    {
+      "epoch": 4.26,
+      "learning_rate": 0.00011629020053848047,
+      "loss": 1.8176,
+      "step": 200
+    },
+    {
+      "epoch": 4.36,
+      "learning_rate": 9.747612470258382e-05,
+      "loss": 1.8704,
+      "step": 205
+    },
+    {
+      "epoch": 4.47,
+      "learning_rate": 7.546656444541333e-05,
+      "loss": 1.9435,
+      "step": 210
+    },
+    {
+      "epoch": 4.57,
+      "learning_rate": 5.2697137691647635e-05,
+      "loss": 2.0704,
+      "step": 215
+    },
+    {
+      "epoch": 4.68,
+      "learning_rate": 3.1687550572992616e-05,
+      "loss": 2.0372,
+      "step": 220
+    },
+    {
+      "epoch": 4.79,
+      "learning_rate": 1.4762762169883855e-05,
+      "loss": 1.9286,
+      "step": 225
+    },
+    {
+      "epoch": 4.89,
+      "learning_rate": 3.795700315696817e-06,
+      "loss": 1.7845,
+      "step": 230
+    },
+    {
+      "epoch": 5.0,
+      "learning_rate": 0.0,
+      "loss": 1.7819,
+      "step": 235
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 1.9693368673324585,
+      "eval_runtime": 1.5591,
+      "eval_samples_per_second": 42.973,
+      "eval_steps_per_second": 5.772,
+      "step": 235
+    },
+    {
+      "epoch": 5.11,
+      "learning_rate": 3.7957003156967485e-06,
+      "loss": 1.6773,
+      "step": 240
+    },
+    {
+      "epoch": 5.21,
+      "learning_rate": 1.4762762169883802e-05,
+      "loss": 1.9066,
+      "step": 245
+    },
+    {
+      "epoch": 5.32,
+      "learning_rate": 3.168755057299255e-05,
+      "loss": 1.7702,
+      "step": 250
+    },
+    {
+      "epoch": 5.43,
+      "learning_rate": 5.269713769164743e-05,
+      "loss": 1.7718,
+      "step": 255
+    },
+    {
+      "epoch": 5.53,
+      "learning_rate": 7.546656444541325e-05,
+      "loss": 1.6923,
+      "step": 260
+    },
+    {
+      "epoch": 5.64,
+      "learning_rate": 9.747612470258363e-05,
+      "loss": 1.8134,
+      "step": 265
+    },
+    {
+      "epoch": 5.74,
+      "learning_rate": 0.0001162902005384805,
+      "loss": 1.6943,
+      "step": 270
+    },
+    {
+      "epoch": 5.85,
+      "learning_rate": 0.00012982679213998787,
+      "loss": 1.7474,
+      "step": 275
+    },
+    {
+      "epoch": 5.96,
+      "learning_rate": 0.00013658791568741792,
+      "loss": 1.664,
+      "step": 280
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 1.8830074071884155,
+      "eval_runtime": 1.6028,
+      "eval_samples_per_second": 41.801,
+      "eval_steps_per_second": 5.615,
+      "step": 282
+    },
+    {
+      "epoch": 6.06,
+      "learning_rate": 0.00013582537296550986,
+      "loss": 1.7593,
+      "step": 285
+    },
+    {
+      "epoch": 6.17,
+      "learning_rate": 0.00012762354833724559,
+      "loss": 1.68,
+      "step": 290
+    },
+    {
+      "epoch": 6.28,
+      "learning_rate": 0.0001128900706169339,
+      "loss": 1.5629,
+      "step": 295
+    },
+    {
+      "epoch": 6.38,
+      "learning_rate": 9.325537321665346e-05,
+      "loss": 1.5448,
+      "step": 300
+    },
+    {
+      "epoch": 6.49,
+      "learning_rate": 7.089226742272658e-05,
+      "loss": 1.5643,
+      "step": 305
+    },
+    {
+      "epoch": 6.6,
+      "learning_rate": 4.827549515124539e-05,
+      "loss": 1.5585,
+      "step": 310
+    },
+    {
+      "epoch": 6.7,
+      "learning_rate": 2.7907869511566348e-05,
+      "loss": 1.724,
+      "step": 315
+    },
+    {
+      "epoch": 6.81,
+      "learning_rate": 1.2043308960555334e-05,
+      "loss": 1.3896,
+      "step": 320
+    },
+    {
+      "epoch": 6.91,
+      "learning_rate": 2.437414591269752e-06,
+      "loss": 1.4656,
+      "step": 325
+    },
+    {
+      "epoch": 7.0,
+      "eval_loss": 1.7848814725875854,
+      "eval_runtime": 1.6359,
+      "eval_samples_per_second": 40.956,
+      "eval_steps_per_second": 5.502,
+      "step": 329
+    },
+    {
+      "epoch": 7.02,
+      "learning_rate": 1.5319212645167772e-07,
+      "loss": 1.4519,
+      "step": 330
+    },
+    {
+      "epoch": 7.13,
+      "learning_rate": 5.443417773996994e-06,
+      "loss": 1.5894,
+      "step": 335
+    },
+    {
+      "epoch": 7.23,
+      "learning_rate": 1.772266555255011e-05,
+      "loss": 1.377,
+      "step": 340
+    },
+    {
+      "epoch": 7.34,
+      "learning_rate": 3.563209159419346e-05,
+      "loss": 1.4357,
+      "step": 345
+    },
+    {
+      "epoch": 7.45,
+      "learning_rate": 5.718980627559723e-05,
+      "loss": 1.5269,
+      "step": 350
+    },
+    {
+      "epoch": 7.55,
+      "learning_rate": 8.001019372440265e-05,
+      "loss": 1.4501,
+      "step": 355
+    },
+    {
+      "epoch": 7.66,
+      "learning_rate": 0.00010156790840580641,
+      "loss": 1.2612,
+      "step": 360
+    },
+    {
+      "epoch": 7.77,
+      "learning_rate": 0.0001194773344474498,
+      "loss": 1.3453,
+      "step": 365
+    },
+    {
+      "epoch": 7.87,
+      "learning_rate": 0.00013175658222600294,
+      "loss": 1.4728,
+      "step": 370
+    },
+    {
+      "epoch": 7.98,
+      "learning_rate": 0.00013704680787354832,
+      "loss": 1.4687,
+      "step": 375
+    },
+    {
+      "epoch": 8.0,
+      "eval_loss": 1.76621675491333,
+      "eval_runtime": 1.6517,
+      "eval_samples_per_second": 40.565,
+      "eval_steps_per_second": 5.449,
+      "step": 376
+    },
+    {
+      "epoch": 8.09,
+      "learning_rate": 0.00013476258540873022,
+      "loss": 1.4856,
+      "step": 380
+    },
+    {
+      "epoch": 8.19,
+      "learning_rate": 0.00012515669103944473,
+      "loss": 1.2361,
+      "step": 385
+    },
+    {
+      "epoch": 8.3,
+      "learning_rate": 0.00010929213048843395,
+      "loss": 1.1886,
+      "step": 390
+    },
+    {
+      "epoch": 8.4,
+      "learning_rate": 8.892450484875472e-05,
+      "loss": 1.3658,
+      "step": 395
+    },
+    {
+      "epoch": 8.51,
+      "learning_rate": 6.630773257727356e-05,
+      "loss": 1.3489,
+      "step": 400
+    },
+    {
+      "epoch": 8.62,
+      "learning_rate": 4.394462678334666e-05,
+      "loss": 1.2336,
+      "step": 405
+    },
+    {
+      "epoch": 8.72,
+      "learning_rate": 2.4309929383066207e-05,
+      "loss": 1.2826,
+      "step": 410
+    },
+    {
+      "epoch": 8.83,
+      "learning_rate": 9.576451662754362e-06,
+      "loss": 1.381,
+      "step": 415
+    },
+    {
+      "epoch": 8.94,
+      "learning_rate": 1.3746270344902175e-06,
+      "loss": 1.2151,
+      "step": 420
+    },
+    {
+      "epoch": 9.0,
+      "eval_loss": 1.6784825325012207,
+      "eval_runtime": 1.678,
+      "eval_samples_per_second": 39.927,
+      "eval_steps_per_second": 5.363,
+      "step": 423
+    }
+  ],
+  "max_steps": 470,
+  "num_train_epochs": 10,
+  "total_flos": 439754489856000.0,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aaa944c20cbf502e00ccd36c3c1dbba26621b29241ccaca9e30360a750f15bf5
+size 3311

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff