Spaces:

smhavens
/

AnalogyArcade

Sleeping

App Files Files Community

Mila commited on Dec 11, 2023

Commit

3139db4

•

1 Parent(s): 33e257e

This time for sure x4

Browse files

Files changed (39) hide show

app_context.py +253 -257
flan-t5-train.py +234 -301
results/checkpoint-16000/added_tokens.json +102 -0
results/checkpoint-16000/config.json +62 -0
results/checkpoint-16000/generation_config.json +6 -0
results/checkpoint-16000/model.safetensors +3 -0
results/checkpoint-16000/optimizer.pt +3 -0
results/checkpoint-16000/rng_state.pth +3 -0
results/checkpoint-16000/scheduler.pt +3 -0
results/checkpoint-16000/special_tokens_map.json +125 -0
results/checkpoint-16000/spiece.model +3 -0
results/checkpoint-16000/tokenizer_config.json +939 -0
results/checkpoint-16000/trainer_state.json +319 -0
results/checkpoint-16000/training_args.bin +3 -0
results/checkpoint-16500/added_tokens.json +102 -0
results/checkpoint-16500/config.json +62 -0
results/checkpoint-16500/generation_config.json +6 -0
results/checkpoint-16500/model.safetensors +3 -0
results/checkpoint-16500/optimizer.pt +3 -0
results/checkpoint-16500/rng_state.pth +3 -0
results/checkpoint-16500/scheduler.pt +3 -0
results/checkpoint-16500/special_tokens_map.json +125 -0
results/checkpoint-16500/spiece.model +3 -0
results/checkpoint-16500/tokenizer_config.json +939 -0
results/checkpoint-16500/trainer_state.json +325 -0
results/checkpoint-16500/training_args.bin +3 -0
results/checkpoint-17000/added_tokens.json +102 -0
results/checkpoint-17000/config.json +62 -0
results/checkpoint-17000/generation_config.json +6 -0
results/checkpoint-17000/model.safetensors +3 -0
results/checkpoint-17000/optimizer.pt +3 -0
results/checkpoint-17000/rng_state.pth +3 -0
results/checkpoint-17000/scheduler.pt +3 -0
results/checkpoint-17000/special_tokens_map.json +125 -0
results/checkpoint-17000/spiece.model +3 -0
results/checkpoint-17000/tokenizer_config.json +939 -0
results/checkpoint-17000/trainer_state.json +331 -0
results/checkpoint-17000/training_args.bin +3 -0
word_embedding.py +619 -0

app_context.py CHANGED Viewed

@@ -1,258 +1,254 @@
-import gradio as gr
-import math
-import spacy
-from datasets import load_dataset
-from sentence_transformers import SentenceTransformer
-from sentence_transformers import InputExample
-from sentence_transformers import losses
-from sentence_transformers import util
-from transformers import pipeline, T5Tokenizer
-from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
-from transformers import TrainingArguments, Trainer, T5ForConditionalGeneration
-import torch
-import torch.nn.functional as F
-from torch.utils.data import DataLoader
-import numpy as np
-import evaluate
-import nltk
-from nltk.corpus import stopwords
-import subprocess
-import sys
-import random
-from textwrap import fill
-# !pip install https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
-subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl'])
-# tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
-model_base = "results/checkpoint-17000"
-nltk.download('stopwords')
-nlp = spacy.load("en_core_web_sm")
-stops = stopwords.words("english")
-ROMAN_CONSTANTS = (
-            ( "", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX" ),
-            ( "", "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC" ),
-            ( "", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM" ),
-            ( "", "M", "MM", "MMM", "",   "",  "-",  "",    "",     ""   ),
-            ( "", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix" ),
-            ( "", "x", "xx", "xxx", "xl", "l", "lx", "lxx", "lxxx", "xc" ),
-            ( "", "c", "cc", "ccc", "cd", "d", "dc", "dcc", "dccc", "cm" ),
-            ( "", "m", "mm", "mmm", "",   "",  "-",  "",    "",     ""   ),
-        )
-# answer = "Pizza"
-guesses = []
-return_guesses = []
-answer = "Moon"
-word1 = "Black"
-word2 = "White"
-word3 = "Sun"
-base_prompts = ["Sun is to Moon as ", "Black is to White as ", "Atom is to Element as",
-                "Athens is to Greece as ", "Cat is to Dog as ", "Robin is to Bird as",
-                "Hunger is to Ambition as "]
-#Mean Pooling - Take attention mask into account for correct averaging
-def mean_pooling(model_output, attention_mask):
-    token_embeddings = model_output['token_embeddings'] #First element of model_output contains all token embeddings
-    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
-    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
-def normalize(comment, lowercase, remove_stopwords):
-    if lowercase:
-        comment = comment.lower()
-    comment = nlp(comment)
-    lemmatized = list()
-    for word in comment:
-        lemma = word.lemma_.strip()
-        if lemma:
-            if not remove_stopwords or (remove_stopwords and lemma not in stops):
-                lemmatized.append(lemma)
-    return " ".join(lemmatized)
-# def tokenize_function(examples):
-#     return tokenizer(examples["text"])
-def compute_metrics(eval_pred):
-    logits, labels = eval_pred
-    predictions = np.argmax(logits, axis=-1)
-    metric = evaluate.load("accuracy")
-    return metric.compute(predictions=predictions, references=labels)
-def get_model():
-    global model_base
-    # last_checkpoint = "./results/checkpoint-22500"
-    finetuned_model = T5ForConditionalGeneration.from_pretrained(model_base)
-    tokenizer = T5Tokenizer.from_pretrained(model_base)
-    # model = SentenceTransformer(model_base)
-    gpu_available = torch.cuda.is_available()
-    device = torch.device("cuda" if gpu_available else "cpu")
-    finetuned_model = finetuned_model.to(device)
-    return finetuned_model, tokenizer
-def cosine_scores(model, sentence):
-    global word1
-    global word2
-    global word3
-    # sentence1 = f"{word1} is to {word2} as"
-    embeddings1 = model.encode(sentence, convert_to_tensor=True)
-def embeddings(model, sentences, tokenizer):
-    global word1
-    global word2
-    global word3
-    global model_base
-    gpu_available = torch.cuda.is_available()
-    device = torch.device("cuda" if gpu_available else "cpu")
-    # device = torch.device('cuda:0')
-    # embeddings = model.encode(sentences)
-    question = "Please answer to this question: " + sentences
-    inputs = tokenizer(question, return_tensors="pt")
-    print(inputs)
-    # print(inputs.device)
-    print(model.device)
-    print(inputs['input_ids'].device)
-    print(inputs['attention_mask'].device)
-    inputs['attention_mask'] = inputs['attention_mask'].to(device)
-    inputs['input_ids'] = inputs['input_ids'].to(device)
-    outputs = model.generate(**inputs)
-    answer = tokenizer.decode(outputs[0])
-    answer = answer[6:-4]
-    # print(fill(answer, width=80))
-    print("ANSWER IS", answer)
-    return answer
-def random_word(model, tokenizer):
-    global model_base
-    vocab = tokenizer.get_vocab()
-    # with open(model_base + '/vocab.txt', 'r') as file:
-    line = ""
-    # content = file.readlines()
-    length = tokenizer.vocab_size
-    # print(vocab)
-    while line == "":
-        rand_line = random.randrange(0, length)
-        # print("TRYING TO FIND", rand_line, "OUT OF", length, "WITH VOCAB OF TYPE", type(vocab))
-        for word, id in vocab.items():
-            if id == rand_line and word[0].isalpha() and word not in stops and word not in ROMAN_CONSTANTS:
-        # if vocab[rand_line][0].isalpha() and vocab[rand_line][:-1] not in stops and vocab[rand_line][:-1] not in ROMAN_CONSTANTS:
-                line = word
-            elif id == rand_line:
-                print(f"{word} is not alpha or is a stop word")
-    # for num, aline in enumerate(file, 1997):
-    #     if random.randrange(num) and aline.isalpha():
-    #         continue
-    #     # elif not aline.isalpha():
-    #     line = aline
-    print(line)
-    return line
-def generate_prompt(model, tokenizer):
-    global word1
-    global word2
-    global word3
-    global answer
-    global base_prompts
-    word1 = random_word(model, tokenizer)
-    # word2 = random_word()
-    word2 = embeddings(model, f"{base_prompts[random.randint(0, len(base_prompts) - 1)]}{word1} is to ___.", tokenizer)
-    word3 = random_word(model, tokenizer)
-    sentence = f"{word1} is to {word2} as {word3} is to ___."
-    print(sentence)
-    answer = embeddings(model, sentence, tokenizer)
-    print("ANSWER IS", answer)
-    return f"# {word1} is to {word2} as {word3} is to ___."
-    # cosine_scores(model, sentence)
-def greet(name):
-    return "Hello " + name + "!!"
-def check_answer(guess:str):
-    global guesses
-    global answer
-    global return_guesses
-    global word1
-    global word2
-    global word3
-    model, tokenizer = get_model()
-    output = ""
-    protected_guess = guess
-    sentence = f"{word1} is to {word2} as [MASK] is to {guess}."
-    other_word = embeddings(model, sentence, tokenizer)
-    guesses.append(guess)
-    for guess in return_guesses:
-        output += ("- " + guess + "<br>")
-    # output = output[:-1]
-    prompt = f"{word1} is to {word2} as {word3} is to ___."
-    # print("IS", protected_guess, "EQUAL TO", answer, ":", protected_guess.lower() == answer.lower())
-    if protected_guess.lower() == answer.lower():
-        return_guesses.append(f"{protected_guess}: {word1} is to {word2} as {word3} is to {protected_guess}.")
-        output += f"<span style='color:green'>- {return_guesses[-1]}</span><br>"
-        new_prompt = generate_prompt(model, tokenizer)
-        return new_prompt, "Correct!", output
-    else:
-        return_guess = f"{protected_guess}: {word1} is to {word2} as {other_word} is to {protected_guess}."
-        return_guesses.append(return_guess)
-        output += ("- " + return_guess + " <br>")
-        return prompt, "Try again!", output
-def main():
-    global word1
-    global word2
-    global word3
-    global answer
-    # answer = "Moon"
-    global guesses
-    # num_rows, data_type, value, example, embeddings = training()
-    # sent_embeddings = embeddings()
-    model, tokenizer = get_model()
-    generate_prompt(model, tokenizer)
-    prompt = f"{word1} is to {word2} as {word3} is to ____"
-    print(prompt)
-    print("TESTING EMBEDDINGS")
-    with gr.Blocks() as iface:
-        mark_question = gr.Markdown(prompt)
-        with gr.Tab("Guess"):
-            text_input = gr.Textbox()
-            text_output = gr.Textbox()
-            text_button = gr.Button("Submit")
-        with gr.Accordion("Open for previous guesses"):
-            text_guesses = gr.Markdown()
-        # with gr.Tab("Testing"):
-        #     gr.Markdown(f"""The Embeddings are {sent_embeddings}.""")
-        text_button.click(check_answer, inputs=[text_input], outputs=[mark_question, text_output, text_guesses])
-    # iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-    iface.launch()
-if __name__ == "__main__":
     main()

+import gradio as gr
+import math
+import spacy
+from datasets import load_dataset
+from transformers import pipeline, T5Tokenizer
+from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
+from transformers import TrainingArguments, Trainer, T5ForConditionalGeneration
+import torch
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+import numpy as np
+import evaluate
+import nltk
+from nltk.corpus import stopwords
+import subprocess
+import sys
+import random
+from textwrap import fill
+# !pip install https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
+subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl'])
+# tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
+model_base = "results/checkpoint-17000"
+nltk.download('stopwords')
+nlp = spacy.load("en_core_web_sm")
+stops = stopwords.words("english")
+ROMAN_CONSTANTS = (
+            ( "", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX" ),
+            ( "", "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC" ),
+            ( "", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM" ),
+            ( "", "M", "MM", "MMM", "",   "",  "-",  "",    "",     ""   ),
+            ( "", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix" ),
+            ( "", "x", "xx", "xxx", "xl", "l", "lx", "lxx", "lxxx", "xc" ),
+            ( "", "c", "cc", "ccc", "cd", "d", "dc", "dcc", "dccc", "cm" ),
+            ( "", "m", "mm", "mmm", "",   "",  "-",  "",    "",     ""   ),
+        )
+# answer = "Pizza"
+guesses = []
+return_guesses = []
+answer = "Moon"
+word1 = "Black"
+word2 = "White"
+word3 = "Sun"
+base_prompts = ["Sun is to Moon as ", "Black is to White as ", "Atom is to Element as",
+                "Athens is to Greece as ", "Cat is to Dog as ", "Robin is to Bird as",
+                "Hunger is to Ambition as "]
+#Mean Pooling - Take attention mask into account for correct averaging
+def mean_pooling(model_output, attention_mask):
+    token_embeddings = model_output['token_embeddings'] #First element of model_output contains all token embeddings
+    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+def normalize(comment, lowercase, remove_stopwords):
+    if lowercase:
+        comment = comment.lower()
+    comment = nlp(comment)
+    lemmatized = list()
+    for word in comment:
+        lemma = word.lemma_.strip()
+        if lemma:
+            if not remove_stopwords or (remove_stopwords and lemma not in stops):
+                lemmatized.append(lemma)
+    return " ".join(lemmatized)
+# def tokenize_function(examples):
+#     return tokenizer(examples["text"])
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    predictions = np.argmax(logits, axis=-1)
+    metric = evaluate.load("accuracy")
+    return metric.compute(predictions=predictions, references=labels)
+def get_model():
+    global model_base
+    # last_checkpoint = "./results/checkpoint-22500"
+    finetuned_model = T5ForConditionalGeneration.from_pretrained(model_base)
+    tokenizer = T5Tokenizer.from_pretrained(model_base)
+    # model = SentenceTransformer(model_base)
+    gpu_available = torch.cuda.is_available()
+    device = torch.device("cuda" if gpu_available else "cpu")
+    finetuned_model = finetuned_model.to(device)
+    return finetuned_model, tokenizer
+def cosine_scores(model, sentence):
+    global word1
+    global word2
+    global word3
+    # sentence1 = f"{word1} is to {word2} as"
+    embeddings1 = model.encode(sentence, convert_to_tensor=True)
+def embeddings(model, sentences, tokenizer):
+    global word1
+    global word2
+    global word3
+    global model_base
+    gpu_available = torch.cuda.is_available()
+    device = torch.device("cuda" if gpu_available else "cpu")
+    # device = torch.device('cuda:0')
+    # embeddings = model.encode(sentences)
+    question = "Please answer to this question: " + sentences
+    inputs = tokenizer(question, return_tensors="pt")
+    print(inputs)
+    # print(inputs.device)
+    print(model.device)
+    print(inputs['input_ids'].device)
+    print(inputs['attention_mask'].device)
+    inputs['attention_mask'] = inputs['attention_mask'].to(device)
+    inputs['input_ids'] = inputs['input_ids'].to(device)
+    outputs = model.generate(**inputs)
+    answer = tokenizer.decode(outputs[0])
+    answer = answer[6:-4]
+    # print(fill(answer, width=80))
+    print("ANSWER IS", answer)
+    return answer
+def random_word(model, tokenizer):
+    global model_base
+    vocab = tokenizer.get_vocab()
+    # with open(model_base + '/vocab.txt', 'r') as file:
+    line = ""
+    # content = file.readlines()
+    length = tokenizer.vocab_size
+    # print(vocab)
+    while line == "":
+        rand_line = random.randrange(0, length)
+        # print("TRYING TO FIND", rand_line, "OUT OF", length, "WITH VOCAB OF TYPE", type(vocab))
+        for word, id in vocab.items():
+            if id == rand_line and word[0].isalpha() and word not in stops and word not in ROMAN_CONSTANTS:
+        # if vocab[rand_line][0].isalpha() and vocab[rand_line][:-1] not in stops and vocab[rand_line][:-1] not in ROMAN_CONSTANTS:
+                line = word
+            elif id == rand_line:
+                print(f"{word} is not alpha or is a stop word")
+    # for num, aline in enumerate(file, 1997):
+    #     if random.randrange(num) and aline.isalpha():
+    #         continue
+    #     # elif not aline.isalpha():
+    #     line = aline
+    print(line)
+    return line
+def generate_prompt(model, tokenizer):
+    global word1
+    global word2
+    global word3
+    global answer
+    global base_prompts
+    word1 = random_word(model, tokenizer)
+    # word2 = random_word()
+    word2 = embeddings(model, f"{base_prompts[random.randint(0, len(base_prompts) - 1)]}{word1} is to ___.", tokenizer)
+    word3 = random_word(model, tokenizer)
+    sentence = f"{word1} is to {word2} as {word3} is to ___."
+    print(sentence)
+    answer = embeddings(model, sentence, tokenizer)
+    print("ANSWER IS", answer)
+    return f"# {word1} is to {word2} as {word3} is to ___."
+    # cosine_scores(model, sentence)
+def greet(name):
+    return "Hello " + name + "!!"
+def check_answer(guess:str):
+    global guesses
+    global answer
+    global return_guesses
+    global word1
+    global word2
+    global word3
+    model, tokenizer = get_model()
+    output = ""
+    protected_guess = guess
+    sentence = f"{word1} is to {word2} as [MASK] is to {guess}."
+    other_word = embeddings(model, sentence, tokenizer)
+    guesses.append(guess)
+    for guess in return_guesses:
+        output += ("- " + guess + "<br>")
+    # output = output[:-1]
+    prompt = f"{word1} is to {word2} as {word3} is to ___."
+    # print("IS", protected_guess, "EQUAL TO", answer, ":", protected_guess.lower() == answer.lower())
+    if protected_guess.lower() == answer.lower():
+        return_guesses.append(f"{protected_guess}: {word1} is to {word2} as {word3} is to {protected_guess}.")
+        output += f"<span style='color:green'>- {return_guesses[-1]}</span><br>"
+        new_prompt = generate_prompt(model, tokenizer)
+        return new_prompt, "Correct!", output
+    else:
+        return_guess = f"{protected_guess}: {word1} is to {word2} as {other_word} is to {protected_guess}."
+        return_guesses.append(return_guess)
+        output += ("- " + return_guess + " <br>")
+        return prompt, "Try again!", output
+def main():
+    global word1
+    global word2
+    global word3
+    global answer
+    # answer = "Moon"
+    global guesses
+    # num_rows, data_type, value, example, embeddings = training()
+    # sent_embeddings = embeddings()
+    model, tokenizer = get_model()
+    generate_prompt(model, tokenizer)
+    prompt = f"{word1} is to {word2} as {word3} is to ____"
+    print(prompt)
+    print("TESTING EMBEDDINGS")
+    with gr.Blocks() as iface:
+        mark_question = gr.Markdown(prompt)
+        with gr.Tab("Guess"):
+            text_input = gr.Textbox()
+            text_output = gr.Textbox()
+            text_button = gr.Button("Submit")
+        with gr.Accordion("Open for previous guesses"):
+            text_guesses = gr.Markdown()
+        # with gr.Tab("Testing"):
+        #     gr.Markdown(f"""The Embeddings are {sent_embeddings}.""")
+        text_button.click(check_answer, inputs=[text_input], outputs=[mark_question, text_output, text_guesses])
+    # iface = gr.Interface(fn=greet, inputs="text", outputs="text")
+    iface.launch()
+if __name__ == "__main__":
     main()

flan-t5-train.py CHANGED Viewed

@@ -1,302 +1,235 @@
-import gradio as gr
-import math
-from datasets import load_dataset
-from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
-from transformers import TrainingArguments, Trainer
-from transformers import T5Tokenizer, T5ForConditionalGeneration
-import torch
-import torch.nn.functional as F
-from torch.utils.data import DataLoader
-import numpy as np
-import evaluate
-import nltk
-from nltk.corpus import stopwords
-import subprocess
-import sys
-from transformers import T5Tokenizer, DataCollatorForSeq2Seq
-from transformers import T5ForConditionalGeneration, Seq2SeqTrainingArguments, Seq2SeqTrainer
-from transformers import DataCollatorWithPadding, DistilBertTokenizerFast
-from transformers import TrainingArguments
-from transformers import (
-    BertModel,
-    BertTokenizerFast,
-    Trainer,
-    EvalPrediction
-)
-# !pip install https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
-# subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl'])
-# tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
-# data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
-# nltk.download('stopwords')
-# nlp = spacy.load("en_core_web_sm")
-# stops = stopwords.words("english")
-nltk.download("punkt", quiet=True)
-metric = evaluate.load("rouge")
-# Global Parameters
-L_RATE = 3e-4
-BATCH_SIZE = 8
-PER_DEVICE_EVAL_BATCH = 4
-WEIGHT_DECAY = 0.01
-SAVE_TOTAL_LIM = 3
-NUM_EPOCHS = 10
-# Set up training arguments
-training_args = Seq2SeqTrainingArguments(
-   output_dir="./results",
-   evaluation_strategy="epoch",
-   learning_rate=L_RATE,
-   per_device_train_batch_size=BATCH_SIZE,
-   per_device_eval_batch_size=PER_DEVICE_EVAL_BATCH,
-   weight_decay=WEIGHT_DECAY,
-   save_total_limit=SAVE_TOTAL_LIM,
-   num_train_epochs=NUM_EPOCHS,
-   predict_with_generate=True,
-   push_to_hub=False
-)
-model_id = "google/flan-t5-base"
-tokenizer = T5Tokenizer.from_pretrained(model_id)
-# tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
-# metric = evaluate.load("accuracy")
-def tokenize_function(examples):
-    return tokenizer(examples["stem"], padding="max_length", truncation=True)
-#Mean Pooling - Take attention mask into account for correct averaging
-def mean_pooling(model_output, attention_mask):
-    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
-    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
-    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
-# def compute_metrics(eval_pred):
-#     logits, labels = eval_pred
-#     predictions = np.argmax(logits, axis=-1)
-#     metric = evaluate.load("accuracy")
-#     return metric.compute(predictions=predictions, references=labels)
-def compute_metrics(eval_preds):
-   preds, labels = eval_preds
-   # decode preds and labels
-   labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
-   decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
-   decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
-   # rougeLSum expects newline after each sentence
-   decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]
-   decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]
-   result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
-   return result
-def training():
-    dataset_id = "tomasmcz/word2vec_analogy"
-    # dataset_id = "relbert/scientific_and_creative_analogy"
-    # dataset_sub = "Quadruples_Kmiecik_random_split"
-    print("GETTING DATASET")
-    dataset = load_dataset(dataset_id)
-    # dataset = dataset["train"]
-    # tokenized_datasets = dataset.map(tokenize_function, batched=True)
-    print(dataset)
-    print(f"- The {dataset_id} dataset has {dataset['train'].num_rows} examples.")
-    print(f"- Each example is a {type(dataset['train'][0])} with a {type(dataset['train'][0])} as value.")
-    print(f"- Examples look like this: {dataset['train'][0]}")
-    # for i in dataset["train"]:
-    #     print(i["AB"], "to", i["CD"], "is", i["label"])
-    dataset = dataset["train"].train_test_split(test_size=0.3)
-    # We prefix our tasks with "answer the question"
-    prefix = "Please answer this question: "
-    # Define the preprocessing function
-    # def preprocess_function(examples):
-    #     """Add prefix to the sentences, tokenize the text, and set the labels"""
-    #     # The "inputs" are the tokenized answer:
-    #     inputs = []
-    #     # print(examples)
-    #     # inputs = [prefix + doc for doc in examples["question"]]
-    #     for doc in examples['source']:
-    #         # print("THE DOC IS:", doc)
-    #         # print("THE DOC IS:", examples[i]['AB'], examples[i]['CD'], examples[i]['label'])
-    #         prompt = f"{prefix}map "
-    #         for item in doc:
-    #             prompt += f"{item}, and "
-    #         prompt = prompt[:-6]
-    #         inputs.append(prompt)
-    #     # inputs = [prefix + doc for doc in examples["question"]]
-    #     for indx, doc in enumerate(examples["target_random"]):
-    #         prompt = f" to "
-    #         for item in doc:
-    #             prompt += f"{item}, and "
-    #         prompt = prompt[:-6] + "."
-    #         inputs[indx] += prompt
-    #     model_inputs = tokenizer(inputs, max_length=128, truncation=True)
-    def preprocess_function(examples):
-        """Add prefix to the sentences, tokenize the text, and set the labels"""
-        # The "inputs" are the tokenized answer:
-        inputs = []
-        # print(examples)
-        # inputs = [prefix + doc for doc in examples["question"]]
-        for doc in examples['word_a']:
-            # print("THE DOC IS:", doc)
-            # print("THE DOC IS:", examples[i]['AB'], examples[i]['CD'], examples[i]['label'])
-            prompt = f"{prefix}{doc} is to "
-            inputs.append(prompt)
-        # inputs = [prefix + doc for doc in examples["question"]]
-        for indx, doc in enumerate(examples["word_b"]):
-            prompt = f"{doc} as "
-            inputs[indx] += prompt
-        for indx, doc in enumerate(examples["word_c"]):
-            prompt = f"{doc} is to ___."
-            inputs[indx] += prompt
-        model_inputs = tokenizer(inputs, max_length=128, truncation=True)
-        # print(examples["label"], type(examples["label"]))
-        # The "labels" are the tokenized outputs:
-        labels = tokenizer(text_target=examples["word_d"],
-                            max_length=512,
-                            truncation=True)
-        model_inputs["labels"] = labels["input_ids"]
-        return model_inputs
-    # Map the preprocessing function across our dataset
-    tokenized_dataset = dataset.map(preprocess_function, batched=True)
-    # train_examples = []
-    # train_data = dataset["test"]
-    # # For agility we only 1/2 of our available data
-    # n_examples = dataset["test"].num_rows // 2
-    # for i in range(n_examples):
-    #     example = train_data[i]
-    #     temp_word_1 = example["stem"][0]
-    #     temp_word_2 = example["stem"][1]
-    #     temp_word_3 = example["choice"][example["answer"]][0]
-    #     temp_word_4 = example["choice"][example["answer"]][1]
-    #     comp1 = f"{temp_word_1} to {temp_word_2}"
-    #     comp2 = f"{temp_word_3} to {temp_word_4}"
-    #     # example_opposite = dataset_clean[-(i)]
-    #     # print(example["text"])
-    #     train_examples.append(InputExample(texts=[comp1, comp2]))
-    # train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=25)
-    print("END DATALOADER")
-    # print(train_examples)
-    embeddings = finetune(tokenized_dataset)
-    return 0
-def finetune(dataset):
-    # model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=5)
-    # model_id = "sentence-transformers/all-MiniLM-L6-v2"
-    model_id = "google/flan-t5-base"
-    # model_id = "distilbert-base-uncased"
-    # tokenizer = DistilBertTokenizerFast.from_pretrained(model_id)
-    tokenizer = T5Tokenizer.from_pretrained(model_id)
-    model = T5ForConditionalGeneration.from_pretrained(model_id)
-    data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)
-    device = torch.device('cuda:0')
-    model = model.to(device)
-    # training_args = TrainingArguments(output_dir="test_trainer")
-    # USE THIS LINK
-    # https://huggingface.co/blog/how-to-train-sentence-transformers
-    # train_loss = losses.MegaBatchMarginLoss(model=model)
-    # ds_train, ds_valid = dataset.train_test_split(test_size=0.2, seed=42)
-    print("BEGIN FIT")
-    trainer = Seq2SeqTrainer(
-        model=model,
-        args=training_args,
-        train_dataset=dataset["train"],
-        eval_dataset=dataset["test"],
-        # evaluation_strategy="no"
-        tokenizer=tokenizer,
-        data_collator=data_collator,
-        compute_metrics=compute_metrics
-        )
-    # model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=10)
-    trainer.train()
-    # model.save("flan-analogies")
-    # model.save_to_hub("smhavens/bert-base-analogies")
-    # accuracy = compute_metrics(eval, metric)
-    return 0
-def greet(name):
-    return "Hello " + name + "!!"
-def check_answer(guess:str):
-    global guesses
-    global answer
-    guesses.append(guess)
-    output = ""
-    for guess in guesses:
-        output += ("- " + guess + "\n")
-    output = output[:-1]
-    if guess.lower() == answer.lower():
-        return "Correct!", output
-    else:
-        return "Try again!", output
-def main():
-    print("BEGIN")
-    word1 = "Black"
-    word2 = "White"
-    word3 = "Sun"
-    global answer
-    answer = "Moon"
-    global guesses
-    training()
-    # prompt = f"{word1} is to {word2} as {word3} is to ____"
-    # with gr.Blocks() as iface:
-    #     gr.Markdown(prompt)
-    #     with gr.Tab("Guess"):
-    #         text_input = gr.Textbox()
-    #         text_output = gr.Textbox()
-    #         text_button = gr.Button("Submit")
-    #     with gr.Accordion("Open for previous guesses"):
-    #         text_guesses = gr.Textbox()
-    #     with gr.Tab("Testing"):
-    #         gr.Markdown(f"""Number of rows in dataset is {num_rows}, with each having type {data_type} and value {value}.
-    #                     An example is {example}.
-    #                     The Embeddings are {embeddings}.""")
-    #     text_button.click(check_answer, inputs=[text_input], outputs=[text_output, text_guesses])
-    # # iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-    # iface.launch()
-if __name__ == "__main__":
     main()

+import gradio as gr
+import math
+from datasets import load_dataset
+from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
+from transformers import TrainingArguments, Trainer
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+import torch
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+import numpy as np
+import evaluate
+import nltk
+from nltk.corpus import stopwords
+import subprocess
+import sys
+from transformers import T5Tokenizer, DataCollatorForSeq2Seq
+from transformers import T5ForConditionalGeneration, Seq2SeqTrainingArguments, Seq2SeqTrainer
+from transformers import DataCollatorWithPadding, DistilBertTokenizerFast
+from transformers import TrainingArguments
+from transformers import (
+    BertModel,
+    BertTokenizerFast,
+    Trainer,
+    EvalPrediction
+)
+nltk.download("punkt", quiet=True)
+metric = evaluate.load("rouge")
+# Global Parameters
+L_RATE = 3e-4
+BATCH_SIZE = 8
+PER_DEVICE_EVAL_BATCH = 4
+WEIGHT_DECAY = 0.01
+SAVE_TOTAL_LIM = 3
+NUM_EPOCHS = 10
+# Set up training arguments
+training_args = Seq2SeqTrainingArguments(
+   output_dir="./results",
+   evaluation_strategy="epoch",
+   learning_rate=L_RATE,
+   per_device_train_batch_size=BATCH_SIZE,
+   per_device_eval_batch_size=PER_DEVICE_EVAL_BATCH,
+   weight_decay=WEIGHT_DECAY,
+   save_total_limit=SAVE_TOTAL_LIM,
+   num_train_epochs=NUM_EPOCHS,
+   predict_with_generate=True,
+   push_to_hub=False
+)
+model_id = "google/flan-t5-base"
+tokenizer = T5Tokenizer.from_pretrained(model_id)
+# tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
+# metric = evaluate.load("accuracy")
+def tokenize_function(examples):
+    return tokenizer(examples["stem"], padding="max_length", truncation=True)
+#Mean Pooling - Take attention mask into account for correct averaging
+def mean_pooling(model_output, attention_mask):
+    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
+    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+# def compute_metrics(eval_pred):
+#     logits, labels = eval_pred
+#     predictions = np.argmax(logits, axis=-1)
+#     metric = evaluate.load("accuracy")
+#     return metric.compute(predictions=predictions, references=labels)
+def compute_metrics(eval_preds):
+   preds, labels = eval_preds
+   # decode preds and labels
+   labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+   decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
+   decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
+   # rougeLSum expects newline after each sentence
+   decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]
+   decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]
+   result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
+   return result
+def training():
+    dataset_id = "tomasmcz/word2vec_analogy"
+    # dataset_id = "relbert/scientific_and_creative_analogy"
+    # dataset_sub = "Quadruples_Kmiecik_random_split"
+    print("GETTING DATASET")
+    dataset = load_dataset(dataset_id)
+    # dataset = dataset["train"]
+    # tokenized_datasets = dataset.map(tokenize_function, batched=True)
+    print(dataset)
+    print(f"- The {dataset_id} dataset has {dataset['train'].num_rows} examples.")
+    print(f"- Each example is a {type(dataset['train'][0])} with a {type(dataset['train'][0])} as value.")
+    print(f"- Examples look like this: {dataset['train'][0]}")
+    # for i in dataset["train"]:
+    #     print(i["AB"], "to", i["CD"], "is", i["label"])
+    dataset = dataset["train"].train_test_split(test_size=0.3)
+    # We prefix our tasks with "answer the question"
+    prefix = "Please answer this question: "
+    def preprocess_function(examples):
+        """Add prefix to the sentences, tokenize the text, and set the labels"""
+        # The "inputs" are the tokenized answer:
+        inputs = []
+        # print(examples)
+        # inputs = [prefix + doc for doc in examples["question"]]
+        for doc in examples['word_a']:
+            # print("THE DOC IS:", doc)
+            # print("THE DOC IS:", examples[i]['AB'], examples[i]['CD'], examples[i]['label'])
+            prompt = f"{prefix}{doc} is to "
+            inputs.append(prompt)
+        # inputs = [prefix + doc for doc in examples["question"]]
+        for indx, doc in enumerate(examples["word_b"]):
+            prompt = f"{doc} as "
+            inputs[indx] += prompt
+        for indx, doc in enumerate(examples["word_c"]):
+            prompt = f"{doc} is to ___."
+            inputs[indx] += prompt
+        model_inputs = tokenizer(inputs, max_length=128, truncation=True)
+        # print(examples["label"], type(examples["label"]))
+        # The "labels" are the tokenized outputs:
+        labels = tokenizer(text_target=examples["word_d"],
+                            max_length=512,
+                            truncation=True)
+        model_inputs["labels"] = labels["input_ids"]
+        return model_inputs
+    # Map the preprocessing function across our dataset
+    tokenized_dataset = dataset.map(preprocess_function, batched=True)
+    print("END DATALOADER")
+    # print(train_examples)
+    embeddings = finetune(tokenized_dataset)
+    return 0
+def finetune(dataset):
+    # model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=5)
+    # model_id = "sentence-transformers/all-MiniLM-L6-v2"
+    model_id = "google/flan-t5-base"
+    # model_id = "distilbert-base-uncased"
+    # tokenizer = DistilBertTokenizerFast.from_pretrained(model_id)
+    tokenizer = T5Tokenizer.from_pretrained(model_id)
+    model = T5ForConditionalGeneration.from_pretrained(model_id)
+    data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)
+    device = torch.device('cuda:0')
+    model = model.to(device)
+    # training_args = TrainingArguments(output_dir="test_trainer")
+    # USE THIS LINK
+    # https://huggingface.co/blog/how-to-train-sentence-transformers
+    # train_loss = losses.MegaBatchMarginLoss(model=model)
+    # ds_train, ds_valid = dataset.train_test_split(test_size=0.2, seed=42)
+    print("BEGIN FIT")
+    trainer = Seq2SeqTrainer(
+        model=model,
+        args=training_args,
+        train_dataset=dataset["train"],
+        eval_dataset=dataset["test"],
+        # evaluation_strategy="no"
+        tokenizer=tokenizer,
+        data_collator=data_collator,
+        compute_metrics=compute_metrics
+        )
+    # model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=10)
+    trainer.train()
+    # model.save("flan-analogies")
+    # model.save_to_hub("smhavens/bert-base-analogies")
+    # accuracy = compute_metrics(eval, metric)
+    return 0
+def greet(name):
+    return "Hello " + name + "!!"
+def check_answer(guess:str):
+    global guesses
+    global answer
+    guesses.append(guess)
+    output = ""
+    for guess in guesses:
+        output += ("- " + guess + "\n")
+    output = output[:-1]
+    if guess.lower() == answer.lower():
+        return "Correct!", output
+    else:
+        return "Try again!", output
+def main():
+    print("BEGIN")
+    word1 = "Black"
+    word2 = "White"
+    word3 = "Sun"
+    global answer
+    answer = "Moon"
+    global guesses
+    training()
+if __name__ == "__main__":
     main()

results/checkpoint-16000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,102 @@

+{
+  "<extra_id_0>": 32099,
+  "<extra_id_10>": 32089,
+  "<extra_id_11>": 32088,
+  "<extra_id_12>": 32087,
+  "<extra_id_13>": 32086,
+  "<extra_id_14>": 32085,
+  "<extra_id_15>": 32084,
+  "<extra_id_16>": 32083,
+  "<extra_id_17>": 32082,
+  "<extra_id_18>": 32081,
+  "<extra_id_19>": 32080,
+  "<extra_id_1>": 32098,
+  "<extra_id_20>": 32079,
+  "<extra_id_21>": 32078,
+  "<extra_id_22>": 32077,
+  "<extra_id_23>": 32076,
+  "<extra_id_24>": 32075,
+  "<extra_id_25>": 32074,
+  "<extra_id_26>": 32073,
+  "<extra_id_27>": 32072,
+  "<extra_id_28>": 32071,
+  "<extra_id_29>": 32070,
+  "<extra_id_2>": 32097,
+  "<extra_id_30>": 32069,
+  "<extra_id_31>": 32068,
+  "<extra_id_32>": 32067,
+  "<extra_id_33>": 32066,
+  "<extra_id_34>": 32065,
+  "<extra_id_35>": 32064,
+  "<extra_id_36>": 32063,
+  "<extra_id_37>": 32062,
+  "<extra_id_38>": 32061,
+  "<extra_id_39>": 32060,
+  "<extra_id_3>": 32096,
+  "<extra_id_40>": 32059,
+  "<extra_id_41>": 32058,
+  "<extra_id_42>": 32057,
+  "<extra_id_43>": 32056,
+  "<extra_id_44>": 32055,
+  "<extra_id_45>": 32054,
+  "<extra_id_46>": 32053,
+  "<extra_id_47>": 32052,
+  "<extra_id_48>": 32051,
+  "<extra_id_49>": 32050,
+  "<extra_id_4>": 32095,
+  "<extra_id_50>": 32049,
+  "<extra_id_51>": 32048,
+  "<extra_id_52>": 32047,
+  "<extra_id_53>": 32046,
+  "<extra_id_54>": 32045,
+  "<extra_id_55>": 32044,
+  "<extra_id_56>": 32043,
+  "<extra_id_57>": 32042,
+  "<extra_id_58>": 32041,
+  "<extra_id_59>": 32040,
+  "<extra_id_5>": 32094,
+  "<extra_id_60>": 32039,
+  "<extra_id_61>": 32038,
+  "<extra_id_62>": 32037,
+  "<extra_id_63>": 32036,
+  "<extra_id_64>": 32035,
+  "<extra_id_65>": 32034,
+  "<extra_id_66>": 32033,
+  "<extra_id_67>": 32032,
+  "<extra_id_68>": 32031,
+  "<extra_id_69>": 32030,
+  "<extra_id_6>": 32093,
+  "<extra_id_70>": 32029,
+  "<extra_id_71>": 32028,
+  "<extra_id_72>": 32027,
+  "<extra_id_73>": 32026,
+  "<extra_id_74>": 32025,
+  "<extra_id_75>": 32024,
+  "<extra_id_76>": 32023,
+  "<extra_id_77>": 32022,
+  "<extra_id_78>": 32021,
+  "<extra_id_79>": 32020,
+  "<extra_id_7>": 32092,
+  "<extra_id_80>": 32019,
+  "<extra_id_81>": 32018,
+  "<extra_id_82>": 32017,
+  "<extra_id_83>": 32016,
+  "<extra_id_84>": 32015,
+  "<extra_id_85>": 32014,
+  "<extra_id_86>": 32013,
+  "<extra_id_87>": 32012,
+  "<extra_id_88>": 32011,
+  "<extra_id_89>": 32010,
+  "<extra_id_8>": 32091,
+  "<extra_id_90>": 32009,
+  "<extra_id_91>": 32008,
+  "<extra_id_92>": 32007,
+  "<extra_id_93>": 32006,
+  "<extra_id_94>": 32005,
+  "<extra_id_95>": 32004,
+  "<extra_id_96>": 32003,
+  "<extra_id_97>": 32002,
+  "<extra_id_98>": 32001,
+  "<extra_id_99>": 32000,
+  "<extra_id_9>": 32090
+}

results/checkpoint-16000/config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "_name_or_path": "google/flan-t5-base",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.35.2",
+  "use_cache": true,
+  "vocab_size": 32128
+}

results/checkpoint-16000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.35.2"
+}

results/checkpoint-16000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd7f96db75733e18d6af8488ab51eea991be641c6c22b24fa5ab3b45101c3398
+size 990345064

results/checkpoint-16000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31aa07bcfc63b03b9dbfb77536457e4d0591b64d537e2f4834f5b81c6bd2ab21
+size 1980860410

results/checkpoint-16000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc296e1811c88d4548bfa74b8cf96485e58c41652ba8a0db69b6e3a9762f9be0
+size 14244

results/checkpoint-16000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c77d751bb87ca04afd8f823ee9102cffea6221900b1a056c2f31d9044f1a0ce
+size 1064

results/checkpoint-16000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,125 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

results/checkpoint-16000/spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
+size 791656

results/checkpoint-16000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,939 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<extra_id_99>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32001": {
+      "content": "<extra_id_98>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32002": {
+      "content": "<extra_id_97>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32003": {
+      "content": "<extra_id_96>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32004": {
+      "content": "<extra_id_95>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32005": {
+      "content": "<extra_id_94>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32006": {
+      "content": "<extra_id_93>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "<extra_id_92>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "<extra_id_91>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "<extra_id_90>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "<extra_id_89>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32011": {
+      "content": "<extra_id_88>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32012": {
+      "content": "<extra_id_87>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32013": {
+      "content": "<extra_id_86>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32014": {
+      "content": "<extra_id_85>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32015": {
+      "content": "<extra_id_84>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "<extra_id_83>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32017": {
+      "content": "<extra_id_82>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32018": {
+      "content": "<extra_id_81>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32019": {
+      "content": "<extra_id_80>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32020": {
+      "content": "<extra_id_79>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32021": {
+      "content": "<extra_id_78>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32022": {
+      "content": "<extra_id_77>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32023": {
+      "content": "<extra_id_76>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32024": {
+      "content": "<extra_id_75>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32025": {
+      "content": "<extra_id_74>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32026": {
+      "content": "<extra_id_73>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32027": {
+      "content": "<extra_id_72>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32028": {
+      "content": "<extra_id_71>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32029": {
+      "content": "<extra_id_70>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32030": {
+      "content": "<extra_id_69>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32031": {
+      "content": "<extra_id_68>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32032": {
+      "content": "<extra_id_67>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32033": {
+      "content": "<extra_id_66>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32034": {
+      "content": "<extra_id_65>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32035": {
+      "content": "<extra_id_64>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32036": {
+      "content": "<extra_id_63>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32037": {
+      "content": "<extra_id_62>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32038": {
+      "content": "<extra_id_61>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32039": {
+      "content": "<extra_id_60>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32040": {
+      "content": "<extra_id_59>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32041": {
+      "content": "<extra_id_58>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32042": {
+      "content": "<extra_id_57>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32043": {
+      "content": "<extra_id_56>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32044": {
+      "content": "<extra_id_55>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32045": {
+      "content": "<extra_id_54>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32046": {
+      "content": "<extra_id_53>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32047": {
+      "content": "<extra_id_52>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32048": {
+      "content": "<extra_id_51>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32049": {
+      "content": "<extra_id_50>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32050": {
+      "content": "<extra_id_49>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32051": {
+      "content": "<extra_id_48>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32052": {
+      "content": "<extra_id_47>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32053": {
+      "content": "<extra_id_46>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32054": {
+      "content": "<extra_id_45>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32055": {
+      "content": "<extra_id_44>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32056": {
+      "content": "<extra_id_43>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32057": {
+      "content": "<extra_id_42>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32058": {
+      "content": "<extra_id_41>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32059": {
+      "content": "<extra_id_40>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32060": {
+      "content": "<extra_id_39>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32061": {
+      "content": "<extra_id_38>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32062": {
+      "content": "<extra_id_37>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32063": {
+      "content": "<extra_id_36>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32064": {
+      "content": "<extra_id_35>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32065": {
+      "content": "<extra_id_34>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32066": {
+      "content": "<extra_id_33>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32067": {
+      "content": "<extra_id_32>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32068": {
+      "content": "<extra_id_31>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32069": {
+      "content": "<extra_id_30>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32070": {
+      "content": "<extra_id_29>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32071": {
+      "content": "<extra_id_28>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32072": {
+      "content": "<extra_id_27>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32073": {
+      "content": "<extra_id_26>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32074": {
+      "content": "<extra_id_25>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32075": {
+      "content": "<extra_id_24>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32076": {
+      "content": "<extra_id_23>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32077": {
+      "content": "<extra_id_22>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32078": {
+      "content": "<extra_id_21>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32079": {
+      "content": "<extra_id_20>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32080": {
+      "content": "<extra_id_19>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32081": {
+      "content": "<extra_id_18>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32082": {
+      "content": "<extra_id_17>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32083": {
+      "content": "<extra_id_16>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32084": {
+      "content": "<extra_id_15>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32085": {
+      "content": "<extra_id_14>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32086": {
+      "content": "<extra_id_13>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32087": {
+      "content": "<extra_id_12>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32088": {
+      "content": "<extra_id_11>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32089": {
+      "content": "<extra_id_10>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32090": {
+      "content": "<extra_id_9>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32091": {
+      "content": "<extra_id_8>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32092": {
+      "content": "<extra_id_7>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32093": {
+      "content": "<extra_id_6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32094": {
+      "content": "<extra_id_5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32095": {
+      "content": "<extra_id_4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32096": {
+      "content": "<extra_id_3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32097": {
+      "content": "<extra_id_2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32098": {
+      "content": "<extra_id_1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32099": {
+      "content": "<extra_id_0>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "legacy": true,
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

results/checkpoint-16000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,319 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 9.35672514619883,
+  "eval_steps": 500,
+  "global_step": 16000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.0002912280701754386,
+      "loss": 0.3858,
+      "step": 500
+    },
+    {
+      "epoch": 0.58,
+      "learning_rate": 0.0002824561403508772,
+      "loss": 0.0819,
+      "step": 1000
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 0.00027368421052631573,
+      "loss": 0.046,
+      "step": 1500
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.006535602733492851,
+      "eval_rouge1": 0.9974420190995907,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9974420190995907,
+      "eval_rougeLsum": 0.9974420190995907,
+      "eval_runtime": 155.5569,
+      "eval_samples_per_second": 37.697,
+      "eval_steps_per_second": 9.424,
+      "step": 1710
+    },
+    {
+      "epoch": 1.17,
+      "learning_rate": 0.0002649122807017544,
+      "loss": 0.0317,
+      "step": 2000
+    },
+    {
+      "epoch": 1.46,
+      "learning_rate": 0.00025614035087719294,
+      "loss": 0.0132,
+      "step": 2500
+    },
+    {
+      "epoch": 1.75,
+      "learning_rate": 0.00024736842105263154,
+      "loss": 0.0103,
+      "step": 3000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.00893484242260456,
+      "eval_rouge1": 0.997612551159618,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.997612551159618,
+      "eval_rougeLsum": 0.997612551159618,
+      "eval_runtime": 159.0943,
+      "eval_samples_per_second": 36.859,
+      "eval_steps_per_second": 9.215,
+      "step": 3420
+    },
+    {
+      "epoch": 2.05,
+      "learning_rate": 0.00023859649122807015,
+      "loss": 0.015,
+      "step": 3500
+    },
+    {
+      "epoch": 2.34,
+      "learning_rate": 0.00022982456140350875,
+      "loss": 0.0078,
+      "step": 4000
+    },
+    {
+      "epoch": 2.63,
+      "learning_rate": 0.00022105263157894733,
+      "loss": 0.0075,
+      "step": 4500
+    },
+    {
+      "epoch": 2.92,
+      "learning_rate": 0.00021228070175438596,
+      "loss": 0.0105,
+      "step": 5000
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.002929441863670945,
+      "eval_rouge1": 0.9982946793997272,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9982946793997272,
+      "eval_rougeLsum": 0.9982946793997272,
+      "eval_runtime": 155.9353,
+      "eval_samples_per_second": 37.605,
+      "eval_steps_per_second": 9.401,
+      "step": 5130
+    },
+    {
+      "epoch": 3.22,
+      "learning_rate": 0.00020350877192982454,
+      "loss": 0.0159,
+      "step": 5500
+    },
+    {
+      "epoch": 3.51,
+      "learning_rate": 0.00019473684210526314,
+      "loss": 0.0121,
+      "step": 6000
+    },
+    {
+      "epoch": 3.8,
+      "learning_rate": 0.00018596491228070172,
+      "loss": 0.0105,
+      "step": 6500
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.0014885533601045609,
+      "eval_rouge1": 0.9986357435197817,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9986357435197817,
+      "eval_rougeLsum": 0.9986357435197817,
+      "eval_runtime": 159.1866,
+      "eval_samples_per_second": 36.837,
+      "eval_steps_per_second": 9.209,
+      "step": 6840
+    },
+    {
+      "epoch": 4.09,
+      "learning_rate": 0.00017719298245614035,
+      "loss": 0.0136,
+      "step": 7000
+    },
+    {
+      "epoch": 4.39,
+      "learning_rate": 0.00016842105263157892,
+      "loss": 0.0067,
+      "step": 7500
+    },
+    {
+      "epoch": 4.68,
+      "learning_rate": 0.00015964912280701753,
+      "loss": 0.0052,
+      "step": 8000
+    },
+    {
+      "epoch": 4.97,
+      "learning_rate": 0.00015087719298245613,
+      "loss": 0.0032,
+      "step": 8500
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.002013931516557932,
+      "eval_rouge1": 0.9982946793997272,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9982946793997272,
+      "eval_rougeLsum": 0.9982946793997272,
+      "eval_runtime": 158.9827,
+      "eval_samples_per_second": 36.885,
+      "eval_steps_per_second": 9.221,
+      "step": 8550
+    },
+    {
+      "epoch": 5.26,
+      "learning_rate": 0.0001421052631578947,
+      "loss": 0.0036,
+      "step": 9000
+    },
+    {
+      "epoch": 5.56,
+      "learning_rate": 0.0001333333333333333,
+      "loss": 0.0028,
+      "step": 9500
+    },
+    {
+      "epoch": 5.85,
+      "learning_rate": 0.00012456140350877192,
+      "loss": 0.0023,
+      "step": 10000
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 0.008263664320111275,
+      "eval_rouge1": 0.997612551159618,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.997612551159618,
+      "eval_rougeLsum": 0.997612551159618,
+      "eval_runtime": 165.6672,
+      "eval_samples_per_second": 35.396,
+      "eval_steps_per_second": 8.849,
+      "step": 10260
+    },
+    {
+      "epoch": 6.14,
+      "learning_rate": 0.00011578947368421051,
+      "loss": 0.0045,
+      "step": 10500
+    },
+    {
+      "epoch": 6.43,
+      "learning_rate": 0.00010701754385964911,
+      "loss": 0.0031,
+      "step": 11000
+    },
+    {
+      "epoch": 6.73,
+      "learning_rate": 9.82456140350877e-05,
+      "loss": 0.0013,
+      "step": 11500
+    },
+    {
+      "epoch": 7.0,
+      "eval_loss": 0.003634733846411109,
+      "eval_rouge1": 0.9982946793997272,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9982946793997272,
+      "eval_rougeLsum": 0.9982946793997272,
+      "eval_runtime": 165.5178,
+      "eval_samples_per_second": 35.428,
+      "eval_steps_per_second": 8.857,
+      "step": 11970
+    },
+    {
+      "epoch": 7.02,
+      "learning_rate": 8.94736842105263e-05,
+      "loss": 0.0017,
+      "step": 12000
+    },
+    {
+      "epoch": 7.31,
+      "learning_rate": 8.07017543859649e-05,
+      "loss": 0.0008,
+      "step": 12500
+    },
+    {
+      "epoch": 7.6,
+      "learning_rate": 7.19298245614035e-05,
+      "loss": 0.0017,
+      "step": 13000
+    },
+    {
+      "epoch": 7.89,
+      "learning_rate": 6.315789473684209e-05,
+      "loss": 0.0012,
+      "step": 13500
+    },
+    {
+      "epoch": 8.0,
+      "eval_loss": 0.0013940236531198025,
+      "eval_rouge1": 0.9982946793997272,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9982946793997272,
+      "eval_rougeLsum": 0.9982946793997272,
+      "eval_runtime": 166.5345,
+      "eval_samples_per_second": 35.212,
+      "eval_steps_per_second": 8.803,
+      "step": 13680
+    },
+    {
+      "epoch": 8.19,
+      "learning_rate": 5.4385964912280694e-05,
+      "loss": 0.0024,
+      "step": 14000
+    },
+    {
+      "epoch": 8.48,
+      "learning_rate": 4.561403508771929e-05,
+      "loss": 0.0015,
+      "step": 14500
+    },
+    {
+      "epoch": 8.77,
+      "learning_rate": 3.684210526315789e-05,
+      "loss": 0.0012,
+      "step": 15000
+    },
+    {
+      "epoch": 9.0,
+      "eval_loss": 0.0021317724604159594,
+      "eval_rouge1": 0.9982946793997272,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9982946793997272,
+      "eval_rougeLsum": 0.9982946793997272,
+      "eval_runtime": 166.3607,
+      "eval_samples_per_second": 35.249,
+      "eval_steps_per_second": 8.812,
+      "step": 15390
+    },
+    {
+      "epoch": 9.06,
+      "learning_rate": 2.807017543859649e-05,
+      "loss": 0.0008,
+      "step": 15500
+    },
+    {
+      "epoch": 9.36,
+      "learning_rate": 1.9298245614035086e-05,
+      "loss": 0.0004,
+      "step": 16000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 17100,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 4109164676038656.0,
+  "trial_name": null,
+  "trial_params": null
+}

results/checkpoint-16000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2e8c3aef9cfe94a083e4e678683065ab146cef97e8d157c2108eb635736de7c
+size 4664

results/checkpoint-16500/added_tokens.json ADDED Viewed

	@@ -0,0 +1,102 @@

+{
+  "<extra_id_0>": 32099,
+  "<extra_id_10>": 32089,
+  "<extra_id_11>": 32088,
+  "<extra_id_12>": 32087,
+  "<extra_id_13>": 32086,
+  "<extra_id_14>": 32085,
+  "<extra_id_15>": 32084,
+  "<extra_id_16>": 32083,
+  "<extra_id_17>": 32082,
+  "<extra_id_18>": 32081,
+  "<extra_id_19>": 32080,
+  "<extra_id_1>": 32098,
+  "<extra_id_20>": 32079,
+  "<extra_id_21>": 32078,
+  "<extra_id_22>": 32077,
+  "<extra_id_23>": 32076,
+  "<extra_id_24>": 32075,
+  "<extra_id_25>": 32074,
+  "<extra_id_26>": 32073,
+  "<extra_id_27>": 32072,
+  "<extra_id_28>": 32071,
+  "<extra_id_29>": 32070,
+  "<extra_id_2>": 32097,
+  "<extra_id_30>": 32069,
+  "<extra_id_31>": 32068,
+  "<extra_id_32>": 32067,
+  "<extra_id_33>": 32066,
+  "<extra_id_34>": 32065,
+  "<extra_id_35>": 32064,
+  "<extra_id_36>": 32063,
+  "<extra_id_37>": 32062,
+  "<extra_id_38>": 32061,
+  "<extra_id_39>": 32060,
+  "<extra_id_3>": 32096,
+  "<extra_id_40>": 32059,
+  "<extra_id_41>": 32058,
+  "<extra_id_42>": 32057,
+  "<extra_id_43>": 32056,
+  "<extra_id_44>": 32055,
+  "<extra_id_45>": 32054,
+  "<extra_id_46>": 32053,
+  "<extra_id_47>": 32052,
+  "<extra_id_48>": 32051,
+  "<extra_id_49>": 32050,
+  "<extra_id_4>": 32095,
+  "<extra_id_50>": 32049,
+  "<extra_id_51>": 32048,
+  "<extra_id_52>": 32047,
+  "<extra_id_53>": 32046,
+  "<extra_id_54>": 32045,
+  "<extra_id_55>": 32044,
+  "<extra_id_56>": 32043,
+  "<extra_id_57>": 32042,
+  "<extra_id_58>": 32041,
+  "<extra_id_59>": 32040,
+  "<extra_id_5>": 32094,
+  "<extra_id_60>": 32039,
+  "<extra_id_61>": 32038,
+  "<extra_id_62>": 32037,
+  "<extra_id_63>": 32036,
+  "<extra_id_64>": 32035,
+  "<extra_id_65>": 32034,
+  "<extra_id_66>": 32033,
+  "<extra_id_67>": 32032,
+  "<extra_id_68>": 32031,
+  "<extra_id_69>": 32030,
+  "<extra_id_6>": 32093,
+  "<extra_id_70>": 32029,
+  "<extra_id_71>": 32028,
+  "<extra_id_72>": 32027,
+  "<extra_id_73>": 32026,
+  "<extra_id_74>": 32025,
+  "<extra_id_75>": 32024,
+  "<extra_id_76>": 32023,
+  "<extra_id_77>": 32022,
+  "<extra_id_78>": 32021,
+  "<extra_id_79>": 32020,
+  "<extra_id_7>": 32092,
+  "<extra_id_80>": 32019,
+  "<extra_id_81>": 32018,
+  "<extra_id_82>": 32017,
+  "<extra_id_83>": 32016,
+  "<extra_id_84>": 32015,
+  "<extra_id_85>": 32014,
+  "<extra_id_86>": 32013,
+  "<extra_id_87>": 32012,
+  "<extra_id_88>": 32011,
+  "<extra_id_89>": 32010,
+  "<extra_id_8>": 32091,
+  "<extra_id_90>": 32009,
+  "<extra_id_91>": 32008,
+  "<extra_id_92>": 32007,
+  "<extra_id_93>": 32006,
+  "<extra_id_94>": 32005,
+  "<extra_id_95>": 32004,
+  "<extra_id_96>": 32003,
+  "<extra_id_97>": 32002,
+  "<extra_id_98>": 32001,
+  "<extra_id_99>": 32000,
+  "<extra_id_9>": 32090
+}

results/checkpoint-16500/config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "_name_or_path": "google/flan-t5-base",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.35.2",
+  "use_cache": true,
+  "vocab_size": 32128
+}

results/checkpoint-16500/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.35.2"
+}

results/checkpoint-16500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7a9d6ae7bd3422210038d1e4c5f886f38a28b91a2226cee03cf256bea0339da
+size 990345064

results/checkpoint-16500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dae4b122e5d07511dc6ed1a937d0886c266881e00214d69729a696f37485fcdd
+size 1980860410

results/checkpoint-16500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50ee6c6cc0809088382943141afb0c6017d470447b3a88779f4e8e80f7985298
+size 14244

results/checkpoint-16500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4bf5bd3fa8e4f5d9ee10872958f6f1ff7a2d039e46912441d75c700ac706191
+size 1064

results/checkpoint-16500/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,125 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

results/checkpoint-16500/spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
+size 791656

results/checkpoint-16500/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,939 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<extra_id_99>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32001": {
+      "content": "<extra_id_98>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32002": {
+      "content": "<extra_id_97>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32003": {
+      "content": "<extra_id_96>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32004": {
+      "content": "<extra_id_95>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32005": {
+      "content": "<extra_id_94>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32006": {
+      "content": "<extra_id_93>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "<extra_id_92>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "<extra_id_91>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "<extra_id_90>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "<extra_id_89>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32011": {
+      "content": "<extra_id_88>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32012": {
+      "content": "<extra_id_87>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32013": {
+      "content": "<extra_id_86>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32014": {
+      "content": "<extra_id_85>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32015": {
+      "content": "<extra_id_84>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "<extra_id_83>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32017": {
+      "content": "<extra_id_82>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32018": {
+      "content": "<extra_id_81>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32019": {
+      "content": "<extra_id_80>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32020": {
+      "content": "<extra_id_79>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32021": {
+      "content": "<extra_id_78>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32022": {
+      "content": "<extra_id_77>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32023": {
+      "content": "<extra_id_76>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32024": {
+      "content": "<extra_id_75>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32025": {
+      "content": "<extra_id_74>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32026": {
+      "content": "<extra_id_73>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32027": {
+      "content": "<extra_id_72>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32028": {
+      "content": "<extra_id_71>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32029": {
+      "content": "<extra_id_70>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32030": {
+      "content": "<extra_id_69>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32031": {
+      "content": "<extra_id_68>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32032": {
+      "content": "<extra_id_67>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32033": {
+      "content": "<extra_id_66>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32034": {
+      "content": "<extra_id_65>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32035": {
+      "content": "<extra_id_64>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32036": {
+      "content": "<extra_id_63>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32037": {
+      "content": "<extra_id_62>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32038": {
+      "content": "<extra_id_61>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32039": {
+      "content": "<extra_id_60>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32040": {
+      "content": "<extra_id_59>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32041": {
+      "content": "<extra_id_58>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32042": {
+      "content": "<extra_id_57>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32043": {
+      "content": "<extra_id_56>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32044": {
+      "content": "<extra_id_55>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32045": {
+      "content": "<extra_id_54>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32046": {
+      "content": "<extra_id_53>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32047": {
+      "content": "<extra_id_52>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32048": {
+      "content": "<extra_id_51>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32049": {
+      "content": "<extra_id_50>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32050": {
+      "content": "<extra_id_49>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32051": {
+      "content": "<extra_id_48>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32052": {
+      "content": "<extra_id_47>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32053": {
+      "content": "<extra_id_46>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32054": {
+      "content": "<extra_id_45>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32055": {
+      "content": "<extra_id_44>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32056": {
+      "content": "<extra_id_43>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32057": {
+      "content": "<extra_id_42>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32058": {
+      "content": "<extra_id_41>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32059": {
+      "content": "<extra_id_40>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32060": {
+      "content": "<extra_id_39>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32061": {
+      "content": "<extra_id_38>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32062": {
+      "content": "<extra_id_37>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32063": {
+      "content": "<extra_id_36>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32064": {
+      "content": "<extra_id_35>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32065": {
+      "content": "<extra_id_34>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32066": {
+      "content": "<extra_id_33>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32067": {
+      "content": "<extra_id_32>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32068": {
+      "content": "<extra_id_31>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32069": {
+      "content": "<extra_id_30>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32070": {
+      "content": "<extra_id_29>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32071": {
+      "content": "<extra_id_28>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32072": {
+      "content": "<extra_id_27>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32073": {
+      "content": "<extra_id_26>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32074": {
+      "content": "<extra_id_25>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32075": {
+      "content": "<extra_id_24>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32076": {
+      "content": "<extra_id_23>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32077": {
+      "content": "<extra_id_22>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32078": {
+      "content": "<extra_id_21>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32079": {
+      "content": "<extra_id_20>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32080": {
+      "content": "<extra_id_19>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32081": {
+      "content": "<extra_id_18>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32082": {
+      "content": "<extra_id_17>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32083": {
+      "content": "<extra_id_16>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32084": {
+      "content": "<extra_id_15>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32085": {
+      "content": "<extra_id_14>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32086": {
+      "content": "<extra_id_13>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32087": {
+      "content": "<extra_id_12>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32088": {
+      "content": "<extra_id_11>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32089": {
+      "content": "<extra_id_10>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32090": {
+      "content": "<extra_id_9>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32091": {
+      "content": "<extra_id_8>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32092": {
+      "content": "<extra_id_7>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32093": {
+      "content": "<extra_id_6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32094": {
+      "content": "<extra_id_5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32095": {
+      "content": "<extra_id_4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32096": {
+      "content": "<extra_id_3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32097": {
+      "content": "<extra_id_2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32098": {
+      "content": "<extra_id_1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32099": {
+      "content": "<extra_id_0>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "legacy": true,
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

results/checkpoint-16500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,325 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 9.649122807017545,
+  "eval_steps": 500,
+  "global_step": 16500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.0002912280701754386,
+      "loss": 0.3858,
+      "step": 500
+    },
+    {
+      "epoch": 0.58,
+      "learning_rate": 0.0002824561403508772,
+      "loss": 0.0819,
+      "step": 1000
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 0.00027368421052631573,
+      "loss": 0.046,
+      "step": 1500
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.006535602733492851,
+      "eval_rouge1": 0.9974420190995907,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9974420190995907,
+      "eval_rougeLsum": 0.9974420190995907,
+      "eval_runtime": 155.5569,
+      "eval_samples_per_second": 37.697,
+      "eval_steps_per_second": 9.424,
+      "step": 1710
+    },
+    {
+      "epoch": 1.17,
+      "learning_rate": 0.0002649122807017544,
+      "loss": 0.0317,
+      "step": 2000
+    },
+    {
+      "epoch": 1.46,
+      "learning_rate": 0.00025614035087719294,
+      "loss": 0.0132,
+      "step": 2500
+    },
+    {
+      "epoch": 1.75,
+      "learning_rate": 0.00024736842105263154,
+      "loss": 0.0103,
+      "step": 3000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.00893484242260456,
+      "eval_rouge1": 0.997612551159618,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.997612551159618,
+      "eval_rougeLsum": 0.997612551159618,
+      "eval_runtime": 159.0943,
+      "eval_samples_per_second": 36.859,
+      "eval_steps_per_second": 9.215,
+      "step": 3420
+    },
+    {
+      "epoch": 2.05,
+      "learning_rate": 0.00023859649122807015,
+      "loss": 0.015,
+      "step": 3500
+    },
+    {
+      "epoch": 2.34,
+      "learning_rate": 0.00022982456140350875,
+      "loss": 0.0078,
+      "step": 4000
+    },
+    {
+      "epoch": 2.63,
+      "learning_rate": 0.00022105263157894733,
+      "loss": 0.0075,
+      "step": 4500
+    },
+    {
+      "epoch": 2.92,
+      "learning_rate": 0.00021228070175438596,
+      "loss": 0.0105,
+      "step": 5000
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.002929441863670945,
+      "eval_rouge1": 0.9982946793997272,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9982946793997272,
+      "eval_rougeLsum": 0.9982946793997272,
+      "eval_runtime": 155.9353,
+      "eval_samples_per_second": 37.605,
+      "eval_steps_per_second": 9.401,
+      "step": 5130
+    },
+    {
+      "epoch": 3.22,
+      "learning_rate": 0.00020350877192982454,
+      "loss": 0.0159,
+      "step": 5500
+    },
+    {
+      "epoch": 3.51,
+      "learning_rate": 0.00019473684210526314,
+      "loss": 0.0121,
+      "step": 6000
+    },
+    {
+      "epoch": 3.8,
+      "learning_rate": 0.00018596491228070172,
+      "loss": 0.0105,
+      "step": 6500
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.0014885533601045609,
+      "eval_rouge1": 0.9986357435197817,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9986357435197817,
+      "eval_rougeLsum": 0.9986357435197817,
+      "eval_runtime": 159.1866,
+      "eval_samples_per_second": 36.837,
+      "eval_steps_per_second": 9.209,
+      "step": 6840
+    },
+    {
+      "epoch": 4.09,
+      "learning_rate": 0.00017719298245614035,
+      "loss": 0.0136,
+      "step": 7000
+    },
+    {
+      "epoch": 4.39,
+      "learning_rate": 0.00016842105263157892,
+      "loss": 0.0067,
+      "step": 7500
+    },
+    {
+      "epoch": 4.68,
+      "learning_rate": 0.00015964912280701753,
+      "loss": 0.0052,
+      "step": 8000
+    },
+    {
+      "epoch": 4.97,
+      "learning_rate": 0.00015087719298245613,
+      "loss": 0.0032,
+      "step": 8500
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.002013931516557932,
+      "eval_rouge1": 0.9982946793997272,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9982946793997272,
+      "eval_rougeLsum": 0.9982946793997272,
+      "eval_runtime": 158.9827,
+      "eval_samples_per_second": 36.885,
+      "eval_steps_per_second": 9.221,
+      "step": 8550
+    },
+    {
+      "epoch": 5.26,
+      "learning_rate": 0.0001421052631578947,
+      "loss": 0.0036,
+      "step": 9000
+    },
+    {
+      "epoch": 5.56,
+      "learning_rate": 0.0001333333333333333,
+      "loss": 0.0028,
+      "step": 9500
+    },
+    {
+      "epoch": 5.85,
+      "learning_rate": 0.00012456140350877192,
+      "loss": 0.0023,
+      "step": 10000
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 0.008263664320111275,
+      "eval_rouge1": 0.997612551159618,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.997612551159618,
+      "eval_rougeLsum": 0.997612551159618,
+      "eval_runtime": 165.6672,
+      "eval_samples_per_second": 35.396,
+      "eval_steps_per_second": 8.849,
+      "step": 10260
+    },
+    {
+      "epoch": 6.14,
+      "learning_rate": 0.00011578947368421051,
+      "loss": 0.0045,
+      "step": 10500
+    },
+    {
+      "epoch": 6.43,
+      "learning_rate": 0.00010701754385964911,
+      "loss": 0.0031,
+      "step": 11000
+    },
+    {
+      "epoch": 6.73,
+      "learning_rate": 9.82456140350877e-05,
+      "loss": 0.0013,
+      "step": 11500
+    },
+    {
+      "epoch": 7.0,
+      "eval_loss": 0.003634733846411109,
+      "eval_rouge1": 0.9982946793997272,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9982946793997272,
+      "eval_rougeLsum": 0.9982946793997272,
+      "eval_runtime": 165.5178,
+      "eval_samples_per_second": 35.428,
+      "eval_steps_per_second": 8.857,
+      "step": 11970
+    },
+    {
+      "epoch": 7.02,
+      "learning_rate": 8.94736842105263e-05,
+      "loss": 0.0017,
+      "step": 12000
+    },
+    {
+      "epoch": 7.31,
+      "learning_rate": 8.07017543859649e-05,
+      "loss": 0.0008,
+      "step": 12500
+    },
+    {
+      "epoch": 7.6,
+      "learning_rate": 7.19298245614035e-05,
+      "loss": 0.0017,
+      "step": 13000
+    },
+    {
+      "epoch": 7.89,
+      "learning_rate": 6.315789473684209e-05,
+      "loss": 0.0012,
+      "step": 13500
+    },
+    {
+      "epoch": 8.0,
+      "eval_loss": 0.0013940236531198025,
+      "eval_rouge1": 0.9982946793997272,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9982946793997272,
+      "eval_rougeLsum": 0.9982946793997272,
+      "eval_runtime": 166.5345,
+      "eval_samples_per_second": 35.212,
+      "eval_steps_per_second": 8.803,
+      "step": 13680
+    },
+    {
+      "epoch": 8.19,
+      "learning_rate": 5.4385964912280694e-05,
+      "loss": 0.0024,
+      "step": 14000
+    },
+    {
+      "epoch": 8.48,
+      "learning_rate": 4.561403508771929e-05,
+      "loss": 0.0015,
+      "step": 14500
+    },
+    {
+      "epoch": 8.77,
+      "learning_rate": 3.684210526315789e-05,
+      "loss": 0.0012,
+      "step": 15000
+    },
+    {
+      "epoch": 9.0,
+      "eval_loss": 0.0021317724604159594,
+      "eval_rouge1": 0.9982946793997272,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9982946793997272,
+      "eval_rougeLsum": 0.9982946793997272,
+      "eval_runtime": 166.3607,
+      "eval_samples_per_second": 35.249,
+      "eval_steps_per_second": 8.812,
+      "step": 15390
+    },
+    {
+      "epoch": 9.06,
+      "learning_rate": 2.807017543859649e-05,
+      "loss": 0.0008,
+      "step": 15500
+    },
+    {
+      "epoch": 9.36,
+      "learning_rate": 1.9298245614035086e-05,
+      "loss": 0.0004,
+      "step": 16000
+    },
+    {
+      "epoch": 9.65,
+      "learning_rate": 1.0526315789473683e-05,
+      "loss": 0.0006,
+      "step": 16500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 17100,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 4237085908942848.0,
+  "trial_name": null,
+  "trial_params": null
+}

results/checkpoint-16500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2e8c3aef9cfe94a083e4e678683065ab146cef97e8d157c2108eb635736de7c
+size 4664

results/checkpoint-17000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,102 @@

+{
+  "<extra_id_0>": 32099,
+  "<extra_id_10>": 32089,
+  "<extra_id_11>": 32088,
+  "<extra_id_12>": 32087,
+  "<extra_id_13>": 32086,
+  "<extra_id_14>": 32085,
+  "<extra_id_15>": 32084,
+  "<extra_id_16>": 32083,
+  "<extra_id_17>": 32082,
+  "<extra_id_18>": 32081,
+  "<extra_id_19>": 32080,
+  "<extra_id_1>": 32098,
+  "<extra_id_20>": 32079,
+  "<extra_id_21>": 32078,
+  "<extra_id_22>": 32077,
+  "<extra_id_23>": 32076,
+  "<extra_id_24>": 32075,
+  "<extra_id_25>": 32074,
+  "<extra_id_26>": 32073,
+  "<extra_id_27>": 32072,
+  "<extra_id_28>": 32071,
+  "<extra_id_29>": 32070,
+  "<extra_id_2>": 32097,
+  "<extra_id_30>": 32069,
+  "<extra_id_31>": 32068,
+  "<extra_id_32>": 32067,
+  "<extra_id_33>": 32066,
+  "<extra_id_34>": 32065,
+  "<extra_id_35>": 32064,
+  "<extra_id_36>": 32063,
+  "<extra_id_37>": 32062,
+  "<extra_id_38>": 32061,
+  "<extra_id_39>": 32060,
+  "<extra_id_3>": 32096,
+  "<extra_id_40>": 32059,
+  "<extra_id_41>": 32058,
+  "<extra_id_42>": 32057,
+  "<extra_id_43>": 32056,
+  "<extra_id_44>": 32055,
+  "<extra_id_45>": 32054,
+  "<extra_id_46>": 32053,
+  "<extra_id_47>": 32052,
+  "<extra_id_48>": 32051,
+  "<extra_id_49>": 32050,
+  "<extra_id_4>": 32095,
+  "<extra_id_50>": 32049,
+  "<extra_id_51>": 32048,
+  "<extra_id_52>": 32047,
+  "<extra_id_53>": 32046,
+  "<extra_id_54>": 32045,
+  "<extra_id_55>": 32044,
+  "<extra_id_56>": 32043,
+  "<extra_id_57>": 32042,
+  "<extra_id_58>": 32041,
+  "<extra_id_59>": 32040,
+  "<extra_id_5>": 32094,
+  "<extra_id_60>": 32039,
+  "<extra_id_61>": 32038,
+  "<extra_id_62>": 32037,
+  "<extra_id_63>": 32036,
+  "<extra_id_64>": 32035,
+  "<extra_id_65>": 32034,
+  "<extra_id_66>": 32033,
+  "<extra_id_67>": 32032,
+  "<extra_id_68>": 32031,
+  "<extra_id_69>": 32030,
+  "<extra_id_6>": 32093,
+  "<extra_id_70>": 32029,
+  "<extra_id_71>": 32028,
+  "<extra_id_72>": 32027,
+  "<extra_id_73>": 32026,
+  "<extra_id_74>": 32025,
+  "<extra_id_75>": 32024,
+  "<extra_id_76>": 32023,
+  "<extra_id_77>": 32022,
+  "<extra_id_78>": 32021,
+  "<extra_id_79>": 32020,
+  "<extra_id_7>": 32092,
+  "<extra_id_80>": 32019,
+  "<extra_id_81>": 32018,
+  "<extra_id_82>": 32017,
+  "<extra_id_83>": 32016,
+  "<extra_id_84>": 32015,
+  "<extra_id_85>": 32014,
+  "<extra_id_86>": 32013,
+  "<extra_id_87>": 32012,
+  "<extra_id_88>": 32011,
+  "<extra_id_89>": 32010,
+  "<extra_id_8>": 32091,
+  "<extra_id_90>": 32009,
+  "<extra_id_91>": 32008,
+  "<extra_id_92>": 32007,
+  "<extra_id_93>": 32006,
+  "<extra_id_94>": 32005,
+  "<extra_id_95>": 32004,
+  "<extra_id_96>": 32003,
+  "<extra_id_97>": 32002,
+  "<extra_id_98>": 32001,
+  "<extra_id_99>": 32000,
+  "<extra_id_9>": 32090
+}

results/checkpoint-17000/config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "_name_or_path": "google/flan-t5-base",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.35.2",
+  "use_cache": true,
+  "vocab_size": 32128
+}

results/checkpoint-17000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.35.2"
+}

results/checkpoint-17000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92fb7ee142103a1cb7adb1d571589e7d21d7239f2e1cb7ca9a6b33c506c487ea
+size 990345064

results/checkpoint-17000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6fe36b8f5c0d0cd2fb3db9f24cd099ee0a5ac33700d73b159bcfb743c7fb4257
+size 1980860410

results/checkpoint-17000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1276a1a4eea6d9d0454dcea4e04dda05b3562ae9183eaf21b7cce953d6a88e2
+size 14244

results/checkpoint-17000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d3992cc3c175d24af106b82e4a70c8b2654ca5720363a954f8a160d3ed6a680f
+size 1064

results/checkpoint-17000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,125 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

results/checkpoint-17000/spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
+size 791656

results/checkpoint-17000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,939 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<extra_id_99>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32001": {
+      "content": "<extra_id_98>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32002": {
+      "content": "<extra_id_97>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32003": {
+      "content": "<extra_id_96>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32004": {
+      "content": "<extra_id_95>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32005": {
+      "content": "<extra_id_94>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32006": {
+      "content": "<extra_id_93>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "<extra_id_92>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "<extra_id_91>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "<extra_id_90>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "<extra_id_89>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32011": {
+      "content": "<extra_id_88>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32012": {
+      "content": "<extra_id_87>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32013": {
+      "content": "<extra_id_86>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32014": {
+      "content": "<extra_id_85>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32015": {
+      "content": "<extra_id_84>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "<extra_id_83>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32017": {
+      "content": "<extra_id_82>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32018": {
+      "content": "<extra_id_81>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32019": {
+      "content": "<extra_id_80>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32020": {
+      "content": "<extra_id_79>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32021": {
+      "content": "<extra_id_78>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32022": {
+      "content": "<extra_id_77>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32023": {
+      "content": "<extra_id_76>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32024": {
+      "content": "<extra_id_75>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32025": {
+      "content": "<extra_id_74>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32026": {
+      "content": "<extra_id_73>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32027": {
+      "content": "<extra_id_72>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32028": {
+      "content": "<extra_id_71>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32029": {
+      "content": "<extra_id_70>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32030": {
+      "content": "<extra_id_69>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32031": {
+      "content": "<extra_id_68>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32032": {
+      "content": "<extra_id_67>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32033": {
+      "content": "<extra_id_66>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32034": {
+      "content": "<extra_id_65>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32035": {
+      "content": "<extra_id_64>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32036": {
+      "content": "<extra_id_63>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32037": {
+      "content": "<extra_id_62>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32038": {
+      "content": "<extra_id_61>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32039": {
+      "content": "<extra_id_60>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32040": {
+      "content": "<extra_id_59>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32041": {
+      "content": "<extra_id_58>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32042": {
+      "content": "<extra_id_57>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32043": {
+      "content": "<extra_id_56>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32044": {
+      "content": "<extra_id_55>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32045": {
+      "content": "<extra_id_54>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32046": {
+      "content": "<extra_id_53>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32047": {
+      "content": "<extra_id_52>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32048": {
+      "content": "<extra_id_51>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32049": {
+      "content": "<extra_id_50>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32050": {
+      "content": "<extra_id_49>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32051": {
+      "content": "<extra_id_48>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32052": {
+      "content": "<extra_id_47>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32053": {
+      "content": "<extra_id_46>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32054": {
+      "content": "<extra_id_45>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32055": {
+      "content": "<extra_id_44>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32056": {
+      "content": "<extra_id_43>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32057": {
+      "content": "<extra_id_42>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32058": {
+      "content": "<extra_id_41>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32059": {
+      "content": "<extra_id_40>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32060": {
+      "content": "<extra_id_39>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32061": {
+      "content": "<extra_id_38>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32062": {
+      "content": "<extra_id_37>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32063": {
+      "content": "<extra_id_36>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32064": {
+      "content": "<extra_id_35>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32065": {
+      "content": "<extra_id_34>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32066": {
+      "content": "<extra_id_33>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32067": {
+      "content": "<extra_id_32>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32068": {
+      "content": "<extra_id_31>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32069": {
+      "content": "<extra_id_30>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32070": {
+      "content": "<extra_id_29>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32071": {
+      "content": "<extra_id_28>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32072": {
+      "content": "<extra_id_27>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32073": {
+      "content": "<extra_id_26>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32074": {
+      "content": "<extra_id_25>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32075": {
+      "content": "<extra_id_24>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32076": {
+      "content": "<extra_id_23>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32077": {
+      "content": "<extra_id_22>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32078": {
+      "content": "<extra_id_21>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32079": {
+      "content": "<extra_id_20>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32080": {
+      "content": "<extra_id_19>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32081": {
+      "content": "<extra_id_18>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32082": {
+      "content": "<extra_id_17>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32083": {
+      "content": "<extra_id_16>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32084": {
+      "content": "<extra_id_15>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32085": {
+      "content": "<extra_id_14>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32086": {
+      "content": "<extra_id_13>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32087": {
+      "content": "<extra_id_12>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32088": {
+      "content": "<extra_id_11>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32089": {
+      "content": "<extra_id_10>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32090": {
+      "content": "<extra_id_9>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32091": {
+      "content": "<extra_id_8>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32092": {
+      "content": "<extra_id_7>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32093": {
+      "content": "<extra_id_6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32094": {
+      "content": "<extra_id_5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32095": {
+      "content": "<extra_id_4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32096": {
+      "content": "<extra_id_3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32097": {
+      "content": "<extra_id_2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32098": {
+      "content": "<extra_id_1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32099": {
+      "content": "<extra_id_0>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "legacy": true,
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

results/checkpoint-17000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,331 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 9.941520467836257,
+  "eval_steps": 500,
+  "global_step": 17000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.0002912280701754386,
+      "loss": 0.3858,
+      "step": 500
+    },
+    {
+      "epoch": 0.58,
+      "learning_rate": 0.0002824561403508772,
+      "loss": 0.0819,
+      "step": 1000
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 0.00027368421052631573,
+      "loss": 0.046,
+      "step": 1500
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.006535602733492851,
+      "eval_rouge1": 0.9974420190995907,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9974420190995907,
+      "eval_rougeLsum": 0.9974420190995907,
+      "eval_runtime": 155.5569,
+      "eval_samples_per_second": 37.697,
+      "eval_steps_per_second": 9.424,
+      "step": 1710
+    },
+    {
+      "epoch": 1.17,
+      "learning_rate": 0.0002649122807017544,
+      "loss": 0.0317,
+      "step": 2000
+    },
+    {
+      "epoch": 1.46,
+      "learning_rate": 0.00025614035087719294,
+      "loss": 0.0132,
+      "step": 2500
+    },
+    {
+      "epoch": 1.75,
+      "learning_rate": 0.00024736842105263154,
+      "loss": 0.0103,
+      "step": 3000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.00893484242260456,
+      "eval_rouge1": 0.997612551159618,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.997612551159618,
+      "eval_rougeLsum": 0.997612551159618,
+      "eval_runtime": 159.0943,
+      "eval_samples_per_second": 36.859,
+      "eval_steps_per_second": 9.215,
+      "step": 3420
+    },
+    {
+      "epoch": 2.05,
+      "learning_rate": 0.00023859649122807015,
+      "loss": 0.015,
+      "step": 3500
+    },
+    {
+      "epoch": 2.34,
+      "learning_rate": 0.00022982456140350875,
+      "loss": 0.0078,
+      "step": 4000
+    },
+    {
+      "epoch": 2.63,
+      "learning_rate": 0.00022105263157894733,
+      "loss": 0.0075,
+      "step": 4500
+    },
+    {
+      "epoch": 2.92,
+      "learning_rate": 0.00021228070175438596,
+      "loss": 0.0105,
+      "step": 5000
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.002929441863670945,
+      "eval_rouge1": 0.9982946793997272,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9982946793997272,
+      "eval_rougeLsum": 0.9982946793997272,
+      "eval_runtime": 155.9353,
+      "eval_samples_per_second": 37.605,
+      "eval_steps_per_second": 9.401,
+      "step": 5130
+    },
+    {
+      "epoch": 3.22,
+      "learning_rate": 0.00020350877192982454,
+      "loss": 0.0159,
+      "step": 5500
+    },
+    {
+      "epoch": 3.51,
+      "learning_rate": 0.00019473684210526314,
+      "loss": 0.0121,
+      "step": 6000
+    },
+    {
+      "epoch": 3.8,
+      "learning_rate": 0.00018596491228070172,
+      "loss": 0.0105,
+      "step": 6500
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.0014885533601045609,
+      "eval_rouge1": 0.9986357435197817,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9986357435197817,
+      "eval_rougeLsum": 0.9986357435197817,
+      "eval_runtime": 159.1866,
+      "eval_samples_per_second": 36.837,
+      "eval_steps_per_second": 9.209,
+      "step": 6840
+    },
+    {
+      "epoch": 4.09,
+      "learning_rate": 0.00017719298245614035,
+      "loss": 0.0136,
+      "step": 7000
+    },
+    {
+      "epoch": 4.39,
+      "learning_rate": 0.00016842105263157892,
+      "loss": 0.0067,
+      "step": 7500
+    },
+    {
+      "epoch": 4.68,
+      "learning_rate": 0.00015964912280701753,
+      "loss": 0.0052,
+      "step": 8000
+    },
+    {
+      "epoch": 4.97,
+      "learning_rate": 0.00015087719298245613,
+      "loss": 0.0032,
+      "step": 8500
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.002013931516557932,
+      "eval_rouge1": 0.9982946793997272,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9982946793997272,
+      "eval_rougeLsum": 0.9982946793997272,
+      "eval_runtime": 158.9827,
+      "eval_samples_per_second": 36.885,
+      "eval_steps_per_second": 9.221,
+      "step": 8550
+    },
+    {
+      "epoch": 5.26,
+      "learning_rate": 0.0001421052631578947,
+      "loss": 0.0036,
+      "step": 9000
+    },
+    {
+      "epoch": 5.56,
+      "learning_rate": 0.0001333333333333333,
+      "loss": 0.0028,
+      "step": 9500
+    },
+    {
+      "epoch": 5.85,
+      "learning_rate": 0.00012456140350877192,
+      "loss": 0.0023,
+      "step": 10000
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 0.008263664320111275,
+      "eval_rouge1": 0.997612551159618,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.997612551159618,
+      "eval_rougeLsum": 0.997612551159618,
+      "eval_runtime": 165.6672,
+      "eval_samples_per_second": 35.396,
+      "eval_steps_per_second": 8.849,
+      "step": 10260
+    },
+    {
+      "epoch": 6.14,
+      "learning_rate": 0.00011578947368421051,
+      "loss": 0.0045,
+      "step": 10500
+    },
+    {
+      "epoch": 6.43,
+      "learning_rate": 0.00010701754385964911,
+      "loss": 0.0031,
+      "step": 11000
+    },
+    {
+      "epoch": 6.73,
+      "learning_rate": 9.82456140350877e-05,
+      "loss": 0.0013,
+      "step": 11500
+    },
+    {
+      "epoch": 7.0,
+      "eval_loss": 0.003634733846411109,
+      "eval_rouge1": 0.9982946793997272,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9982946793997272,
+      "eval_rougeLsum": 0.9982946793997272,
+      "eval_runtime": 165.5178,
+      "eval_samples_per_second": 35.428,
+      "eval_steps_per_second": 8.857,
+      "step": 11970
+    },
+    {
+      "epoch": 7.02,
+      "learning_rate": 8.94736842105263e-05,
+      "loss": 0.0017,
+      "step": 12000
+    },
+    {
+      "epoch": 7.31,
+      "learning_rate": 8.07017543859649e-05,
+      "loss": 0.0008,
+      "step": 12500
+    },
+    {
+      "epoch": 7.6,
+      "learning_rate": 7.19298245614035e-05,
+      "loss": 0.0017,
+      "step": 13000
+    },
+    {
+      "epoch": 7.89,
+      "learning_rate": 6.315789473684209e-05,
+      "loss": 0.0012,
+      "step": 13500
+    },
+    {
+      "epoch": 8.0,
+      "eval_loss": 0.0013940236531198025,
+      "eval_rouge1": 0.9982946793997272,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9982946793997272,
+      "eval_rougeLsum": 0.9982946793997272,
+      "eval_runtime": 166.5345,
+      "eval_samples_per_second": 35.212,
+      "eval_steps_per_second": 8.803,
+      "step": 13680
+    },
+    {
+      "epoch": 8.19,
+      "learning_rate": 5.4385964912280694e-05,
+      "loss": 0.0024,
+      "step": 14000
+    },
+    {
+      "epoch": 8.48,
+      "learning_rate": 4.561403508771929e-05,
+      "loss": 0.0015,
+      "step": 14500
+    },
+    {
+      "epoch": 8.77,
+      "learning_rate": 3.684210526315789e-05,
+      "loss": 0.0012,
+      "step": 15000
+    },
+    {
+      "epoch": 9.0,
+      "eval_loss": 0.0021317724604159594,
+      "eval_rouge1": 0.9982946793997272,
+      "eval_rouge2": 0.0,
+      "eval_rougeL": 0.9982946793997272,
+      "eval_rougeLsum": 0.9982946793997272,
+      "eval_runtime": 166.3607,
+      "eval_samples_per_second": 35.249,
+      "eval_steps_per_second": 8.812,
+      "step": 15390
+    },
+    {
+      "epoch": 9.06,
+      "learning_rate": 2.807017543859649e-05,
+      "loss": 0.0008,
+      "step": 15500
+    },
+    {
+      "epoch": 9.36,
+      "learning_rate": 1.9298245614035086e-05,
+      "loss": 0.0004,
+      "step": 16000
+    },
+    {
+      "epoch": 9.65,
+      "learning_rate": 1.0526315789473683e-05,
+      "loss": 0.0006,
+      "step": 16500
+    },
+    {
+      "epoch": 9.94,
+      "learning_rate": 1.7543859649122805e-06,
+      "loss": 0.0011,
+      "step": 17000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 17100,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 4365210429186048.0,
+  "trial_name": null,
+  "trial_params": null
+}

results/checkpoint-17000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2e8c3aef9cfe94a083e4e678683065ab146cef97e8d157c2108eb635736de7c
+size 4664

word_embedding.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from datasets import load_dataset
 import shutil
 import json
@@ -614,4 +615,622 @@ def main():
 if __name__ == "__main__":
     main()

+<<<<<<< HEAD
 from datasets import load_dataset
 import shutil
 import json
 if __name__ == "__main__":
+=======
+from datasets import load_dataset
+import shutil
+import json
+from collections import defaultdict
+import multiprocessing
+import gensim
+from sklearn.metrics import classification_report
+from gensim import corpora
+from gensim.test.utils import common_texts
+from gensim.models import Word2Vec
+from gensim.models import KeyedVectors
+from gensim.models import fasttext
+from gensim.test.utils import datapath
+from wefe.datasets import load_bingliu
+from wefe.metrics import RNSB
+from wefe.query import Query
+from wefe.word_embedding_model import WordEmbeddingModel
+from wefe.utils import plot_queries_results, run_queries
+import pandas as pd
+import gensim.downloader as api
+import glob
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.ensemble import RandomForestClassifier
+from wefe.metrics import WEAT
+from wefe.datasets import load_weat
+from wefe.utils import run_queries
+from wefe.utils import plot_queries_results
+import random
+from scipy.special import expit
+import math
+import sys
+import os
+import argparse
+import nltk
+import scipy.sparse
+import numpy as np
+import string
+import io
+from sklearn.model_selection import train_test_split
+'''STEPS FOR CODE:
+1. Train word embeddings on Simple English Wikipedia;
+2. Compare these to other pre-trained embeddings;
+3. Quantify biases that exist in these word embeddings;
+4. Use your word embeddings as features in a simple text classifier;
+'''
+def load_vectors(fname):
+    fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
+    n, d = map(int, fin.readline().split())
+    data = {}
+    # print("Hello", n, d)
+    for line in fin:
+        tokens = line.rstrip().split(' ')
+        data[tokens[0]] = map(float, tokens[1:])
+        # print(data)
+    print(data)
+    return data
+def train_embeddings():
+    '''TRAIN WORD EMBEDDINGS
+    This will be making use of the dataset from wikipedia and the first step'''
+    dataset = load_dataset("wikipedia", "20220301.simple")
+    cores = multiprocessing.cpu_count()
+    # check the first example of the training portion of the dataset :
+    # print(dataset['train'][0])
+    dataset_size = len(dataset)
+    ### BUILD VOCAB ###
+    # print(type(dataset["train"][0]))
+    vocab = set()
+    vocab_size = 0
+    count = 0
+    ## Generate vocab and split sentances and words?
+    data = []
+    for index, page in enumerate(dataset["train"]):
+        document = page["text"]
+        document = document.replace("\n", ". ")
+        # print(document)
+        for sent in document.split("."):
+            # print("Sentance:", sent)
+            new_sent = []
+            clean_sent =[s for s in sent if s.isalnum() or s.isspace()]
+            clean_sent = "".join(clean_sent)
+            for word in clean_sent.split(" "):
+                if len(word) > 0:
+                    new_word = word.lower()
+                    # print("Word:", new_word)
+                    if new_word[0] not in string.punctuation:
+                        new_sent.append(new_word)
+            if len(new_sent) > 0:
+                data.append(new_sent)
+                # print("New Sent:", new_sent)
+    for index, page in enumerate(dataset["train"]):
+        # print(page["text"])
+        # for text in page:
+        #     print(text)
+        text = page["text"]
+        clean_text = [s for s in text if s.isalnum() or s.isspace()]
+        clean_text = "".join(clean_text)
+        clean_text = clean_text.replace("\n", " ")
+        # text = text.replace('; ', ' ').replace(", ", " ").replace("\n", " ").replace(":", " ").replace(". ", " ").replace("! ", " ").replace("? ", " ").replace()
+        for word in clean_text.split(" "):
+            # print(word)
+            if word != "\n" and word != " " and word not in vocab:
+                vocab.add(word)
+                vocab_size += 1
+            # if index == 10:
+            #     break
+            # print(f"word #{index}/{count} is {word}")
+        count += 1
+    # print(f"There are {vocab_size} vocab words")
+    embeddings_model = Word2Vec(
+                     data,
+                     epochs= 10,
+                     window=10,
+                     vector_size= 50)
+    embeddings_model.save("word2vec.model")
+    skip_model = Word2Vec(
+                     data,
+                     epochs= 10,
+                     window=10,
+                     vector_size= 50,
+                     sg=1)
+    skip_model.save("skip2vec.model")
+    embeddings_model = Word2Vec.load("word2vec.model")
+    skip_model = Word2Vec.load("skip2vec.model")
+    # embeddings_model.train(dataset, total_examples=dataset_size, epochs=15)
+    # print(embeddings_model['train'])
+    # print(embeddings_model.wv["france"])
+    return embeddings_model, skip_model
+def get_data():
+    dataset = load_dataset("wikipedia", "20220301.simple")
+    cores = multiprocessing.cpu_count()
+    # check the first example of the training portion of the dataset :
+    # print(dataset['train'][0])
+    dataset_size = len(dataset)
+    ### BUILD VOCAB ###
+    # print(type(dataset["train"][0]))
+    vocab = set()
+    vocab_size = 0
+    count = 0
+    ## Generate vocab and split sentances and words?
+    data = []
+    num_sents = 0
+    for index, page in enumerate(dataset["train"]):
+        document = page["text"]
+        document = document.replace("\n", ". ")
+        # print(document)
+        for sent in document.split("."):
+            num_sents += 1
+            # print("Sentance:", sent)
+            new_sent = []
+            clean_sent =[s for s in sent if s.isalnum() or s.isspace()]
+            clean_sent = "".join(clean_sent)
+            for word in clean_sent.split(" "):
+                if len(word) > 0:
+                    new_word = word.lower()
+                    # print("Word:", new_word)
+                    if new_word[0] not in string.punctuation:
+                        new_sent.append(new_word)
+            if len(new_sent) > 0:
+                data.append(new_sent)
+                # print("New Sent:", new_sent)
+    return data, num_sents
+def compare_embeddings(cbow, skip, urban, fasttext):
+    '''COMPARE EMBEDDINGS'''
+    print("Most Similar to dog")
+    print("cbow", cbow.wv.most_similar(positive=['dog'], negative=[], topn=2))
+    print("skip", skip.wv.most_similar(positive=['dog'], negative=[], topn=2))
+    print("urban", urban.most_similar(positive=['dog'], negative=[], topn=2))
+    print("fasttext", fasttext.most_similar(positive=['dog'], negative=[], topn=2))
+    print("\nMost Similar to Pizza - Pepperoni + Pretzel")
+    print("cbow", cbow.wv.most_similar(positive=['pizza', 'pretzel'], negative=['pepperoni'], topn=2))
+    print("skip", skip.wv.most_similar(positive=['pizza', 'pretzel'], negative=['pepperoni'], topn=2))
+    print("urban", urban.most_similar(positive=['pizza', 'pretzel'], negative=['pepperoni'], topn=2))
+    print("fasttext", fasttext.most_similar(positive=['pizza', 'pretzel'], negative=['pepperoni'], topn=2))
+    print("\nMost Similar to witch - woman + man")
+    print("cbow", cbow.wv.most_similar(positive=['witch', 'man'], negative=['woman'], topn=2))
+    print("skip", skip.wv.most_similar(positive=['witch', 'man'], negative=['woman'], topn=2))
+    print("urban", urban.most_similar(positive=['witch', 'man'], negative=['woman'], topn=2))
+    print("fasttext", fasttext.most_similar(positive=['witch', 'man'], negative=['woman'], topn=2))
+    print("\nMost Similar to mayor - town + country")
+    print("cbow", cbow.wv.most_similar(positive=['mayor', 'country'], negative=['town'], topn=2))
+    print("skip", skip.wv.most_similar(positive=['mayor', 'country'], negative=['town'], topn=2))
+    print("urban", urban.most_similar(positive=['mayor', 'country'], negative=['town'], topn=2))
+    print("fasttext", fasttext.most_similar(positive=['mayor', 'country'], negative=['town'], topn=2))
+    print("\nMost Similar to death")
+    print("cbow", cbow.wv.most_similar(positive=['death'], negative=[], topn=2))
+    print("skip", skip.wv.most_similar(positive=['death'], negative=[], topn=2))
+    print("urban", urban.most_similar(positive=['death'], negative=[], topn=2))
+    print("fasttext", fasttext.most_similar(positive=['death'], negative=[], topn=2))
+def quantify_bias(cbow, skip, urban, fasttext):
+    '''QUANTIFY BIASES'''
+    '''Using WEFE, RNSB'''
+    RNSB_words = [
+        ['christianity'],
+        ['catholicism'],
+        ['islam'],
+        ['judaism'],
+        ['hinduism'],
+        ['buddhism'],
+        ['mormonism'],
+        ['scientology'],
+        ['taoism']]
+    weat_wordset = load_weat()
+    models = [WordEmbeddingModel(cbow.wv, "CBOW"),
+              WordEmbeddingModel(skip.wv, "skip-gram"),
+              WordEmbeddingModel(urban, "urban dictionary"),
+              WordEmbeddingModel(fasttext, "fasttext")]
+    # Define the 10 Queries:
+    # print(weat_wordset["science"])
+    religions = ['christianity',
+                 'catholicism',
+                 'islam',
+                 'judaism',
+                 'hinduism',
+                 'buddhism',
+                 'mormonism',
+                 'scientology',
+                 'taoism',
+                 'atheism']
+    queries = [
+        # Flowers vs Insects wrt Pleasant (5) and Unpleasant (5)
+        Query([religions, weat_wordset['arts']],
+            [weat_wordset['career'], weat_wordset['family']],
+            ['Religion', 'Art'], ['Career', 'Family']),
+        Query([religions, weat_wordset['weapons']],
+            [weat_wordset['male_terms'], weat_wordset['female_terms']],
+            ['Religion', 'Weapons'], ['Male terms', 'Female terms']),
+    ]
+    wefe_results = run_queries(WEAT,
+                                queries,
+                                models,
+                                metric_params ={
+                                    'preprocessors': [
+                                        {},
+                                        {'lowercase': True }
+                                    ]
+                                },
+                                warn_not_found_words = True
+                                ).T.round(2)
+    print(wefe_results)
+    plot_queries_results(wefe_results).show()
+def text_classifier(cbow):
+    '''SIMPLE TEXT CLASSIFIER'''
+    '''For each document, average together all embeddings for the
+    individual words in that document to get a new, d-dimensional representation
+    of that document (this is essentially a “continuous bag-of-words”). Note that
+    your input feature size is only d now, instead of the size of your entire vocabulary.
+    Compare the results of training a model using these “CBOW” input features to
+    your original (discrete) BOW model.'''
+    pos_train_files = glob.glob('aclImdb/train/pos/*')
+    neg_train_files = glob.glob('aclImdb/train/neg/*')
+    # print(pos_train_files[:5])
+    num_files_per_class = 1000
+    # bow_train_files = cbow
+    all_train_files = pos_train_files[:num_files_per_class] + neg_train_files[:num_files_per_class]
+    # vectorizer = TfidfVectorizer(input="filename", stop_words="english")
+    # vectors = vectorizer.fit_transform(all_train_files)
+    d = len(cbow.wv["man"])
+    vectors = np.empty([len(all_train_files), d])
+    count = 0
+    vocab = set()
+    for doc in all_train_files:
+        temp_array = avg_embeddings(doc, cbow, vocab)
+        if len(temp_array) > 0:
+            vectors[count] = temp_array
+            count += 1
+        else:
+            vectors = np.delete(vectors, count)
+    # vectors = np.array(avg_embeddings(doc, cbow) for doc in all_train_files)
+    # print(vectors)
+    # print(vocab)
+    # len(vectorizer.vocabulary_)
+    vectors[0].sum()
+    # print("Vector at 0", vectors[0])
+    X = vectors
+    y = [1] * num_files_per_class + [0] * num_files_per_class
+    len(y)
+    x_0 = X[0]
+    w = np.zeros(X.shape[1])
+    # x_0_dense = x_0.todense()
+    x_0.dot(w)
+    w,b = sgd_for_lr_with_ce(X,y)
+    # w
+    # sorted_vocab = sorted([(k,v) for k,v in vectorizer.vocabulary_.items()],key=lambda x:x[1])
+    sorted_vocab = sorted(vocab)
+    # sorted_vocab = [a for (a,b) in sorted_vocab]
+    sorted_words_weights = sorted([x for x in zip(sorted_vocab, w)], key=lambda x:x[1])
+    sorted_words_weights[-50:]
+    preds = predict_y_lr(w,b,X)
+    preds
+    w,b = sgd_for_lr_with_ce(X, y, num_passes=10)
+    y_pred = predict_y_lr(w,b,X)
+    print(classification_report(y, y_pred))
+    # compute for dev set
+    # pos_dev_files = glob.glob('aclImdb/test/pos/*')
+    # neg_dev_files = glob.glob('aclImdb/test/neg/*')
+    # num_dev_files_per_class = 100
+    # all_dev_files = pos_dev_files[:num_dev_files_per_class] + neg_dev_files[:num_dev_files_per_class]
+    # # use the same vectorizer from before! otherwise features won't line up
+    # # don't fit it again, just use it to transform!
+    # X_dev = vectorizer.transform(all_dev_files)
+    # y_dev = [1]* num_dev_files_per_class + [0]* num_dev_files_per_class
+    # # don't need new w and b, these are from out existing model
+    # y_dev_pred = predict_y_lr(w,b,X_dev)
+    # print(classification_report(y_dev, y_dev_pred))
+def avg_embeddings(doc, model, vocab: set):
+    words = []
+    # remove out-of-vocabulary words
+    with open(doc, "r") as file:
+        for line in file:
+            for word in line.split():
+                words.append(word)
+                vocab.add(word)
+    words = [word for word in words if word in model.wv.index_to_key]
+    if len(words) >= 1:
+        return np.mean(model.wv[words], axis=0)
+    else:
+        return []
+def sent_vec(sent, cbow):
+    vector_size = cbow.wv.vector_size
+    wv_res = np.zeros(vector_size)
+    # print(wv_res)
+    ctr = 1
+    for w in sent:
+        if w in cbow.wv:
+            ctr += 1
+            wv_res += cbow.wv[w]
+    wv_res = wv_res/ctr
+    return wv_res
+def spacy_tokenizer(sentence):
+    # Creating our token object, which is used to create documents with linguistic annotations.
+    # doc = nlp(sentence)
+    # print(doc)
+    # print(type(doc))
+    # Lemmatizing each token and converting each token into lowercase
+    # mytokens = [ word.lemma_.lower().strip() for word in doc ]
+    # print(mytokens)
+    # Removing stop words
+    # mytokens = [ word for word in mytokens if word not in stop_words and word not in punctuations ]
+    # return preprocessed list of tokens
+    return 0
+def cbow_classifier(cbow, data, num_sentances):
+    vocab_len = len(cbow.wv.index_to_key)
+    embeddings = []
+    embedding_dict = {}
+    vocab = set(cbow.wv.index_to_key)
+    # print("Data len", len(data))
+    # print("Data at 0", data[0])
+    X_temp = np.empty([len(data), 1])
+    X_train_vect = np.array([np.array([cbow.wv[i] for i in ls if i in vocab])
+                         for ls in data])
+    X_test_vect = np.array([np.array([cbow.wv[i] for i in ls if i in vocab])
+                         for ls in data])
+    # words = [word for word in words if word in cbow.wv.index_to_key]
+    for word in vocab:
+        # embedding[word] = cbow.wv[word]
+        embeddings.append(np.mean(cbow.wv[word], axis=0))
+        embedding_dict[word] = np.mean(cbow.wv[word], axis=0)
+    X = embeddings
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,stratify=y)
+    # print(embeddings)
+    # print(vocab_len)
+    # X_train_vect_avg = []
+    # for v in X_train_vect:
+    #     if v.size:
+    #         X_train_vect_avg.append(v.mean(axis=0))
+    #     else:
+    #         X_train_vect_avg.append(np.zeros(100, dtype=float))
+    # X_test_vect_avg = []
+    # for v in X_test_vect:
+    #     if v.size:
+    #         X_test_vect_avg.append(v.mean(axis=0))
+    #     else:
+    #         X_test_vect_avg.append(np.zeros(100, dtype=float))
+    # # for i, v in enumerate(X_train_vect_avg):
+    # #     print(len(data.iloc[i]), len(v))
+    # x_0 = X_train_vect_avg[0]
+    # num_files_per_class = 100
+    # y = [1] * num_files_per_class + [0] * num_files_per_class
+    # w = np.zeros(X_train_vect_avg.shape[1])
+    # x_0_dense = x_0.todense()
+    # x_0.dot(w)
+    # w,b = sgd_for_lr_with_ce(X_train_vect_avg, y)
+    # w
+    # sorted_vocab = sorted([(k,v) for k,v in enumerate(embedding_dict)],key=lambda x:x[1])
+    # sorted_vocab = [a for (a,b) in sorted_vocab]
+    # sorted_words_weights = sorted([x for x in zip(sorted_vocab, w)], key=lambda x:x[1])
+    # sorted_words_weights[-50:]
+    # preds = predict_y_lr(w,b,X_train_vect_avg)
+    # preds
+    # w,b = sgd_for_lr_with_ce(X_train_vect_avg, y, num_passes=10)
+    # y_pred = predict_y_lr(w,b,X_train_vect_avg)
+    # print(classification_report(y, y_pred))
+    # # compute for dev set
+    # pos_dev_files = glob.glob('aclImdb/test/pos/*')
+    # neg_dev_files = glob.glob('aclImdb/test/neg/*')
+    # num_dev_files_per_class = 100
+    # all_dev_files = pos_dev_files[:num_dev_files_per_class] + neg_dev_files[:num_dev_files_per_class]
+    # # use the same vectorizer from before! otherwise features won't line up
+    # # don't fit it again, just use it to transform!
+    # # X_dev = vectorizer.transform(all_dev_files)
+    # # y_dev = [1]* num_dev_files_per_class + [0]* num_dev_files_per_class
+    # # # don't need new w and b, these are from out existing model
+    # # y_dev_pred = predict_y_lr(w,b,X_dev)
+    # # print(classification_report(y_dev, y_dev_pred))
+def sgd_for_lr_with_ce(X, y, num_passes=5, learning_rate = 0.1):
+    num_data_points = X.shape[0]
+    # Initialize theta -> 0
+    num_features = X.shape[1]
+    w = np.zeros(num_features)
+    b = 0.0
+    # repeat until done
+    # how to define "done"? let's just make it num passes for now
+    # we can also do norm of gradient and when it is < epsilon (something tiny)
+    # we stop
+    for current_pass in range(num_passes):
+        # iterate through entire dataset in random order
+        order = list(range(num_data_points))
+        random.shuffle(order)
+        for i in order:
+            # compute y-hat for this value of i given y_i and x_i
+            x_i = X[i]
+            y_i = y[i]
+            # need to compute based on w and b
+            # sigmoid(w dot x + b)
+            z = x_i.dot(w) + b
+            y_hat_i = expit(z)
+            # for each w (and b), modify by -lr * (y_hat_i - y_i) * x_i
+            w = w - learning_rate * (y_hat_i - y_i) * x_i
+            b = b - learning_rate * (y_hat_i - y_i)
+    # return theta
+    return w,b
+def predict_y_lr(w,b,X,threshold=0.5):
+    # use our matrix operation version of the logistic regression model
+    # X dot w + b
+    # need to make w a column vector so the dimensions line up correctly
+    y_hat = X.dot( w.reshape((-1,1)) ) + b
+    # then just check if it's > threshold
+    preds = np.where(y_hat > threshold,1,0)
+    return preds
+def main():
+    parser = argparse.ArgumentParser(
+        prog='word_embedding',
+        description='This program will train a word embedding model using simple wikipedia.',
+        epilog='To skip training the model and to used the saved model "word2vec.model", use the command --skip or -s.'
+    )
+    parser.add_argument('-s', '--skip', action='store_true')
+    parser.add_argument('-e', '--extra', action='store_true')
+    parser.add_argument('-b', '--bias', action='store_true')
+    parser.add_argument('-c', '--compare', action='store_true')
+    parser.add_argument('-t', '--text', action='store_true')
+    args = parser.parse_args()
+    skip_model = None
+    cbow_model = None
+    ud_model = None
+    wiki_model = None
+    if args.compare:
+        if args.skip:
+            # print("Skipping")
+            cbow_model = Word2Vec.load("word2vec.model")
+            skip_model = Word2Vec.load("skip2vec.model")
+            ud_model = KeyedVectors.load("urban2vec.model")
+            wiki_model = KeyedVectors.load("wiki2vec.model")
+        elif args.extra:
+            # print("Extra mode")
+            cbow_model = Word2Vec.load("word2vec.model")
+            skip_model = Word2Vec.load("skip2vec.model")
+            wiki_model = KeyedVectors.load_word2vec_format("wiki-news-300d-1M-subwords.vec", binary=False)
+            ud_model = KeyedVectors.load_word2vec_format("ud_basic.vec", binary=False)
+            wiki_model.save("wiki2vec.model")
+            ud_model.save("urban2vec.model")
+        else:
+            cbow_model, skip_model = train_embeddings()
+            wiki_model = KeyedVectors.load_word2vec_format("wiki-news-300d-1M-subwords.vec", binary=False)
+            ud_model = KeyedVectors.load_word2vec_format("ud_basic.vec", binary=False)
+            wiki_model.save("wiki2vec.model")
+            ud_model.save("urban2vec.model")
+        compare_embeddings(cbow_model, skip_model, ud_model, wiki_model)
+    if args.bias:
+        if args.skip:
+            # print("Skipping")
+            cbow_model = Word2Vec.load("word2vec.model")
+            skip_model = Word2Vec.load("skip2vec.model")
+            ud_model = KeyedVectors.load("urban2vec.model")
+            wiki_model = KeyedVectors.load("wiki2vec.model")
+        elif args.extra:
+            # print("Extra mode")
+            cbow_model = Word2Vec.load("word2vec.model")
+            skip_model = Word2Vec.load("skip2vec.model")
+            wiki_model = KeyedVectors.load_word2vec_format("wiki-news-300d-1M-subwords.vec", binary=False)
+            ud_model = KeyedVectors.load_word2vec_format("ud_basic.vec", binary=False)
+            wiki_model.save("wiki2vec.model")
+            ud_model.save("urban2vec.model")
+        else:
+            cbow_model, skip_model = train_embeddings()
+            wiki_model = KeyedVectors.load_word2vec_format("wiki-news-300d-1M-subwords.vec", binary=False)
+            ud_model = KeyedVectors.load_word2vec_format("ud_basic.vec", binary=False)
+            wiki_model.save("wiki2vec.model")
+            ud_model.save("urban2vec.model")
+        quantify_bias(cbow_model, skip_model, ud_model, wiki_model)
+    if args.text:
+        if args.skip:
+            # print("Skipping")
+            cbow_model = Word2Vec.load("word2vec.model")
+        else:
+            cbow_model, skip_model = train_embeddings()
+        text_classifier(cbow_model)
+        # data, sents = get_data()
+        # cbow_classifier(cbow_model, data, sents)
+    # print("No errors?")
+if __name__ == "__main__":
+>>>>>>> 7d5b505 (New in-context model with working UI System)
     main()