kejian commited on Mar 19, 2023

Commit

ad44e12

•

1 Parent(s): 5fdcb10

Training in progress, step 1334

Browse files

Files changed (21) hide show

checkpoint-1334/config.json +39 -0
checkpoint-1334/merges.txt +0 -0
checkpoint-1334/optimizer.pt +3 -0
checkpoint-1334/pytorch_model.bin +3 -0
checkpoint-1334/rng_state.pth +3 -0
checkpoint-1334/scaler.pt +3 -0
checkpoint-1334/scheduler.pt +3 -0
checkpoint-1334/special_tokens_map.json +6 -0
checkpoint-1334/tokenizer.json +0 -0
checkpoint-1334/tokenizer_config.json +10 -0
checkpoint-1334/trainer_state.json +425 -0
checkpoint-1334/training_args.bin +3 -0
checkpoint-1334/vocab.json +0 -0
config.json +39 -0
merges.txt +0 -0
pytorch_model.bin +3 -0
special_tokens_map.json +6 -0
tokenizer.json +0 -0
tokenizer_config.json +10 -0
training_args.bin +3 -0
vocab.json +0 -0

checkpoint-1334/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMAndValueHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": true,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.23.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-1334/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1334/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:979d1530027db31f9203c7933c3402262eb346b832d52933bc497bbd9b7b1eb5
+size 995611909

checkpoint-1334/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18526a5d91cde5bfc2d5b87457cf22c4434382b1dffb1425c83b153e956c3c94
+size 510401409

checkpoint-1334/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7c2abaeca8b39188ef4d024976e6b313f1edff37e845008546436359f70d704
+size 15533

checkpoint-1334/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5970b634b76e24683de44b37ff55f568f86fe0760701f4b9ffa126dee06d439
+size 557

checkpoint-1334/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ef515473b5a52626fce1bd33c4fadf38c02a6a297eb95e59e62ff42672e042d
+size 627

checkpoint-1334/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

checkpoint-1334/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1334/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "name_or_path": "gpt2",
+  "special_tokens_map_file": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

checkpoint-1334/trainer_state.json ADDED Viewed

	@@ -0,0 +1,425 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.49962546816479403,
+  "global_step": 1334,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7037037037037037e-05,
+      "loss": 5.8178,
+      "theoretical_loss": 10.87642657795271,
+      "tokens_seen": 1048576
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.000991297767688233,
+      "loss": 3.8434,
+      "theoretical_loss": 5.240583117265738,
+      "tokens_seen": 52428800
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.000972379871358305,
+      "loss": 3.2483,
+      "theoretical_loss": 4.741048233458233,
+      "tokens_seen": 104857600
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.0009534619750283768,
+      "loss": 3.0804,
+      "theoretical_loss": 4.490755246681026,
+      "tokens_seen": 157286400
+    },
+    {
+      "epoch": 0.06,
+      "objective/train/advantage_avg": -0.008124944753944874,
+      "objective/train/docs_used": 104000,
+      "objective/train/instantaneous_batch_size": 32,
+      "objective/train/instantaneous_microbatch_size": 32768,
+      "objective/train/lm_loss": 5.698267936706543,
+      "objective/train/original_loss": 5.698267936706543,
+      "objective/train/theoretical_loss": 4.467094755136979,
+      "objective/train/tokens_used": 184300000,
+      "objective/train/value_avg": -0.0197296142578125,
+      "objective/train/value_loss": 0.00596056692302227,
+      "objective/train/value_max": -0.0023326873779296875,
+      "objective/train/value_min": -0.1529541015625,
+      "objective/train/value_reward_corr": 0.23002298967484386,
+      "objective/train/value_std": 0.01715087890625,
+      "objective/train/weight_avg": 0.9992169737815857,
+      "objective/train/weighted_lm_loss": 5.693380355834961,
+      "objective/train/weights_max": 1.01529061794281,
+      "objective/train/weights_min": 0.9343342781066895,
+      "theoretical_loss": 4.467094755136979,
+      "tokens_seen": 163840000
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.0009345440786984487,
+      "loss": 2.8679,
+      "theoretical_loss": 4.3296357903425715,
+      "tokens_seen": 209715200
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0009156261823685207,
+      "loss": 2.7211,
+      "theoretical_loss": 4.213299841239684,
+      "tokens_seen": 262144000
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.0008967082860385925,
+      "loss": 2.5736,
+      "theoretical_loss": 4.123496734747793,
+      "tokens_seen": 314572800
+    },
+    {
+      "debugging/Self-BLEU-5": 0.5365128506817183,
+      "debugging/distinct-1-grams": 0.7612814402327299,
+      "debugging/distinct-2-grams": 0.9694583753853511,
+      "debugging/entropy-1-grams": 6.003629944255698,
+      "debugging/entropy-2-grams": 7.054987089269872,
+      "debugging/length": 495.25,
+      "debugging/num_segments": 16,
+      "debugging/raw_token_scores_avg": 0.04385810345411301,
+      "debugging/raw_token_scores_std": 0.15687797963619232,
+      "epoch": 0.12,
+      "objective/train/advantage_avg": -0.026558605954051018,
+      "objective/train/docs_used": 197327,
+      "objective/train/instantaneous_batch_size": 32,
+      "objective/train/instantaneous_microbatch_size": 32768,
+      "objective/train/lm_loss": 4.860468864440918,
+      "objective/train/original_loss": 4.860468864440918,
+      "objective/train/theoretical_loss": 4.10401016644798,
+      "objective/train/tokens_used": 348140000,
+      "objective/train/value_avg": -0.01727294921875,
+      "objective/train/value_loss": 0.02569347620010376,
+      "objective/train/value_max": -0.0023593902587890625,
+      "objective/train/value_min": -0.274169921875,
+      "objective/train/value_reward_corr": -0.0460843086754045,
+      "objective/train/value_std": 0.01389312744140625,
+      "objective/train/weight_avg": 0.9974696040153503,
+      "objective/train/weighted_lm_loss": 4.853564262390137,
+      "objective/train/weights_max": 1.0277366638183594,
+      "objective/train/weights_min": 0.9056559801101685,
+      "theoretical_loss": 4.10401016644798,
+      "tokens_seen": 327680000
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.0008777903897086645,
+      "loss": 2.4856,
+      "theoretical_loss": 4.051065245936996,
+      "tokens_seen": 367001600
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.0008588724933787363,
+      "loss": 2.4156,
+      "theoretical_loss": 3.9908001978004064,
+      "tokens_seen": 419430400
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.0008399545970488081,
+      "loss": 2.3438,
+      "theoretical_loss": 3.939481097700623,
+      "tokens_seen": 471859200
+    },
+    {
+      "epoch": 0.18,
+      "objective/train/advantage_avg": 0.01378590613603592,
+      "objective/train/docs_used": 287192,
+      "objective/train/instantaneous_batch_size": 32,
+      "objective/train/instantaneous_microbatch_size": 32768,
+      "objective/train/lm_loss": 4.726146221160889,
+      "objective/train/original_loss": 4.726146697998047,
+      "objective/train/theoretical_loss": 3.9220858822757396,
+      "objective/train/tokens_used": 511980000,
+      "objective/train/value_avg": -0.0184326171875,
+      "objective/train/value_loss": 0.0008915589423850179,
+      "objective/train/value_max": -0.0024433135986328125,
+      "objective/train/value_min": -0.169189453125,
+      "objective/train/value_reward_corr": 0.034202289497960975,
+      "objective/train/value_std": 0.01378631591796875,
+      "objective/train/weight_avg": 1.0013829469680786,
+      "objective/train/weighted_lm_loss": 4.732492446899414,
+      "objective/train/weights_max": 1.0164865255355835,
+      "objective/train/weights_min": 0.913129985332489,
+      "theoretical_loss": 3.9220858822757396,
+      "tokens_seen": 491520000
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.0008210367007188801,
+      "loss": 2.2548,
+      "theoretical_loss": 3.8949869551339704,
+      "tokens_seen": 524288000
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.000802118804388952,
+      "loss": 2.197,
+      "theoretical_loss": 3.855852403938689,
+      "tokens_seen": 576716800
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 0.0007832009080590239,
+      "loss": 2.1036,
+      "theoretical_loss": 3.8210259233045254,
+      "tokens_seen": 629145600
+    },
+    {
+      "debugging/Self-BLEU-5": 0.5265375629586004,
+      "debugging/distinct-1-grams": 0.7435820408094715,
+      "debugging/distinct-2-grams": 0.9558103821233092,
+      "debugging/entropy-1-grams": 5.931434510687563,
+      "debugging/entropy-2-grams": 6.886416755326388,
+      "debugging/length": 521.9230769230769,
+      "debugging/num_segments": 13,
+      "debugging/raw_token_scores_avg": 0.022742915898561478,
+      "debugging/raw_token_scores_std": 0.07841178774833679,
+      "epoch": 0.23,
+      "objective/train/advantage_avg": 0.008544832468032837,
+      "objective/train/docs_used": 379091,
+      "objective/train/instantaneous_batch_size": 32,
+      "objective/train/instantaneous_microbatch_size": 32768,
+      "objective/train/lm_loss": 4.2809929847717285,
+      "objective/train/original_loss": 4.280993461608887,
+      "objective/train/theoretical_loss": 3.804976960695429,
+      "objective/train/tokens_used": 675820000,
+      "objective/train/value_avg": -0.031280517578125,
+      "objective/train/value_loss": 0.004552943632006645,
+      "objective/train/value_max": -0.0027370452880859375,
+      "objective/train/value_min": -0.306396484375,
+      "objective/train/value_reward_corr": 0.5422745268556184,
+      "objective/train/value_std": 0.03082275390625,
+      "objective/train/weight_avg": 1.000877022743225,
+      "objective/train/weighted_lm_loss": 4.284684181213379,
+      "objective/train/weights_max": 1.026648759841919,
+      "objective/train/weights_min": 0.9131191968917847,
+      "theoretical_loss": 3.804976960695429,
+      "tokens_seen": 655360000
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.0007642830117290957,
+      "loss": 2.0198,
+      "theoretical_loss": 3.7897293654583164,
+      "tokens_seen": 681574400
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.0007453651153991677,
+      "loss": 1.9114,
+      "theoretical_loss": 3.7613719997526367,
+      "tokens_seen": 734003200
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.0007264472190692395,
+      "loss": 1.8379,
+      "theoretical_loss": 3.735495625147548,
+      "tokens_seen": 786432000
+    },
+    {
+      "epoch": 0.29,
+      "objective/train/advantage_avg": 0.005197666119784117,
+      "objective/train/docs_used": 471128,
+      "objective/train/instantaneous_batch_size": 32,
+      "objective/train/instantaneous_microbatch_size": 32768,
+      "objective/train/lm_loss": 3.469724655151367,
+      "objective/train/original_loss": 3.469724655151367,
+      "objective/train/theoretical_loss": 3.7204187214233073,
+      "objective/train/tokens_used": 839660000,
+      "objective/train/value_avg": -0.01427459716796875,
+      "objective/train/value_loss": 0.0032349335961043835,
+      "objective/train/value_max": -0.0009217262268066406,
+      "objective/train/value_min": -0.329833984375,
+      "objective/train/value_reward_corr": 0.23257723024354376,
+      "objective/train/value_std": 0.011383056640625,
+      "objective/train/weight_avg": 1.000535488128662,
+      "objective/train/weighted_lm_loss": 3.472010374069214,
+      "objective/train/weights_max": 1.0184398889541626,
+      "objective/train/weights_min": 0.9076024293899536,
+      "theoretical_loss": 3.7204187214233073,
+      "tokens_seen": 819200000
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 0.0007075293227393113,
+      "loss": 1.7735,
+      "theoretical_loss": 3.7117382474521436,
+      "tokens_seen": 838860800
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 0.0006886114264093834,
+      "loss": 1.7242,
+      "theoretical_loss": 3.689809300987042,
+      "tokens_seen": 891289600
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 0.0006696935300794552,
+      "loss": 1.6889,
+      "theoretical_loss": 3.6694722975957066,
+      "tokens_seen": 943718400
+    },
+    {
+      "debugging/Self-BLEU-5": 0.4286046663919377,
+      "debugging/distinct-1-grams": 0.8147567798871364,
+      "debugging/distinct-2-grams": 0.9823269374342457,
+      "debugging/entropy-1-grams": 6.1671920556004824,
+      "debugging/entropy-2-grams": 6.947028138756313,
+      "debugging/length": 477.53333333333336,
+      "debugging/num_segments": 15,
+      "debugging/raw_token_scores_avg": 0.020611366257071495,
+      "debugging/raw_token_scores_std": 0.08496682345867157,
+      "epoch": 0.35,
+      "objective/train/advantage_avg": -0.002937063341960311,
+      "objective/train/docs_used": 560408,
+      "objective/train/instantaneous_batch_size": 32,
+      "objective/train/instantaneous_microbatch_size": 32768,
+      "objective/train/lm_loss": 3.587904214859009,
+      "objective/train/original_loss": 3.5879039764404297,
+      "objective/train/theoretical_loss": 3.6551457544283386,
+      "objective/train/tokens_used": 1003500000,
+      "objective/train/value_avg": -0.0176849365234375,
+      "objective/train/value_loss": 0.006396747659891844,
+      "objective/train/value_max": -0.0007319450378417969,
+      "objective/train/value_min": -0.62451171875,
+      "objective/train/value_reward_corr": 0.3547212443715962,
+      "objective/train/value_std": 0.0216827392578125,
+      "objective/train/weight_avg": 0.9997376203536987,
+      "objective/train/weighted_lm_loss": 3.5868186950683594,
+      "objective/train/weights_max": 1.0237751007080078,
+      "objective/train/weights_min": 0.9077049493789673,
+      "theoretical_loss": 3.6551457544283386,
+      "tokens_seen": 983040000
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 0.0006507756337495271,
+      "loss": 1.7056,
+      "theoretical_loss": 3.6505323968108674,
+      "tokens_seen": 996147200
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 0.000631857737419599,
+      "loss": 1.6782,
+      "theoretical_loss": 3.632827321456789,
+      "tokens_seen": 1048576000
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 0.0006129398410896708,
+      "loss": 1.6393,
+      "theoretical_loss": 3.616220599546101,
+      "tokens_seen": 1101004800
+    },
+    {
+      "epoch": 0.41,
+      "objective/train/advantage_avg": -0.004092915914952755,
+      "objective/train/docs_used": 649861,
+      "objective/train/instantaneous_batch_size": 32,
+      "objective/train/instantaneous_microbatch_size": 32768,
+      "objective/train/lm_loss": 3.148904323577881,
+      "objective/train/original_loss": 3.148904323577881,
+      "objective/train/theoretical_loss": 3.6024992663141386,
+      "objective/train/tokens_used": 1167340000,
+      "objective/train/value_avg": -0.0187835693359375,
+      "objective/train/value_loss": 0.007057450246065855,
+      "objective/train/value_max": -0.0004076957702636719,
+      "objective/train/value_min": -0.76025390625,
+      "objective/train/value_reward_corr": 0.5218380949829392,
+      "objective/train/value_std": 0.02960205078125,
+      "objective/train/weight_avg": 0.9996253848075867,
+      "objective/train/weighted_lm_loss": 3.1456782817840576,
+      "objective/train/weights_max": 1.0473830699920654,
+      "objective/train/weights_min": 0.9076167345046997,
+      "theoretical_loss": 3.6024992663141386,
+      "tokens_seen": 1146880000
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.0005940219447597427,
+      "loss": 1.6315,
+      "theoretical_loss": 3.6005964566275575,
+      "tokens_seen": 1153433600
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 0.0005751040484298145,
+      "loss": 1.6112,
+      "theoretical_loss": 3.585855900316411,
+      "tokens_seen": 1205862400
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 0.0005561861520998866,
+      "loss": 1.5989,
+      "theoretical_loss": 3.571913680014217,
+      "tokens_seen": 1258291200
+    },
+    {
+      "debugging/Self-BLEU-5": 0.49020908264157476,
+      "debugging/distinct-1-grams": 0.768901113497886,
+      "debugging/distinct-2-grams": 0.9428782333551957,
+      "debugging/entropy-1-grams": 6.085999550681761,
+      "debugging/entropy-2-grams": 7.0033060167714964,
+      "debugging/length": 490.2352941176471,
+      "debugging/num_segments": 17,
+      "debugging/raw_token_scores_avg": 0.02056093141436577,
+      "debugging/raw_token_scores_std": 0.10981010645627975,
+      "epoch": 0.47,
+      "objective/train/advantage_avg": -0.0010009908583015203,
+      "objective/train/docs_used": 741674,
+      "objective/train/instantaneous_batch_size": 32,
+      "objective/train/instantaneous_microbatch_size": 32768,
+      "objective/train/lm_loss": 3.083587408065796,
+      "objective/train/original_loss": 3.083587646484375,
+      "objective/train/theoretical_loss": 3.5586958985729016,
+      "objective/train/tokens_used": 1331180000,
+      "objective/train/value_avg": -0.0195770263671875,
+      "objective/train/value_loss": 0.007854425348341465,
+      "objective/train/value_max": -0.0004239082336425781,
+      "objective/train/value_min": -0.97705078125,
+      "objective/train/value_reward_corr": 0.5932239490084845,
+      "objective/train/value_std": 0.0595703125,
+      "objective/train/weight_avg": 0.9999384880065918,
+      "objective/train/weighted_lm_loss": 3.082087516784668,
+      "objective/train/weights_max": 1.0919088125228882,
+      "objective/train/weights_min": 0.9060803055763245,
+      "theoretical_loss": 3.5586958985729016,
+      "tokens_seen": 1310720000
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 0.0005372682557699584,
+      "loss": 1.5642,
+      "theoretical_loss": 3.5586958985729016,
+      "tokens_seen": 1310720000
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 0.0005183503594400303,
+      "loss": 1.5616,
+      "theoretical_loss": 3.5461381161006846,
+      "tokens_seen": 1363148800
+    }
+  ],
+  "max_steps": 2670,
+  "num_train_epochs": 9223372036854775807,
+  "total_flos": 7.138646384411935e+17,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1334/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06649e6d12b7a4cd09a0a8cebe5d368d597d5443538b90e48182d55ad8737e4e
+size 3451

checkpoint-1334/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMAndValueHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": true,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.23.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18526a5d91cde5bfc2d5b87457cf22c4434382b1dffb1425c83b153e956c3c94
+size 510401409

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "name_or_path": "gpt2",
+  "special_tokens_map_file": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06649e6d12b7a4cd09a0a8cebe5d368d597d5443538b90e48182d55ad8737e4e
+size 3451

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff