Training in progress, epoch 1

Browse files

Files changed (8) hide show

.gitignore +1 -0
.ipynb_checkpoints/T5Train-checkpoint.ipynb +561 -0
T5Train.ipynb +0 -0
config.json +60 -0
pytorch_model.bin +3 -0
runs/Jan27_10-28-12_Vince-Desktop/1674815969.93369/events.out.tfevents.1674815969.Vince-Desktop.29244.1 +3 -0
runs/Jan27_10-28-12_Vince-Desktop/events.out.tfevents.1674815969.Vince-Desktop.29244.0 +3 -0
training_args.bin +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ checkpoint-*/

.ipynb_checkpoints/T5Train-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,561 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "3ef6a441",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: nltk in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (3.8.1)\n",
+      "Requirement already satisfied: click in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (from nltk) (8.1.3)\n",
+      "Requirement already satisfied: tqdm in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (from nltk) (4.64.1)\n",
+      "Requirement already satisfied: joblib in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (from nltk) (1.2.0)\n",
+      "Requirement already satisfied: regex>=2021.8.3 in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (from nltk) (2022.10.31)\n",
+      "Requirement already satisfied: colorama in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (from click->nltk) (0.4.6)\n",
+      "Requirement already satisfied: rouge_score in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (0.1.2)\n",
+      "Requirement already satisfied: numpy in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (from rouge_score) (1.24.1)\n",
+      "Requirement already satisfied: absl-py in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (from rouge_score) (1.4.0)\n",
+      "Requirement already satisfied: six>=1.14.0 in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (from rouge_score) (1.16.0)\n",
+      "Requirement already satisfied: nltk in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (from rouge_score) (3.8.1)\n",
+      "Requirement already satisfied: joblib in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (from nltk->rouge_score) (1.2.0)\n",
+      "Requirement already satisfied: tqdm in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (from nltk->rouge_score) (4.64.1)\n",
+      "Requirement already satisfied: regex>=2021.8.3 in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (from nltk->rouge_score) (2022.10.31)\n",
+      "Requirement already satisfied: click in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (from nltk->rouge_score) (8.1.3)\n",
+      "Requirement already satisfied: colorama in c:\\users\\vjmar\\documents\\1. code\\pythonenvs\\hf-env\\lib\\site-packages (from click->nltk->rouge_score) (0.4.6)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# !pip install transformers\n",
+    "!pip install nltk\n",
+    "!pip install rouge_score\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "845c8640",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "23e534d2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\vjmar\\Documents\\1. Code\\PythonEnvs\\hf-env\\lib\\site-packages\\tqdm\\auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "| ID | GPU | MEM |\n",
+      "------------------\n",
+      "|  0 |  5% | 13% |\n",
+      "None\n",
+      "---------------------------------------------------------------\n",
+      "Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.\n",
+      "Token is valid.\n",
+      "Your token has been saved to C:\\Users\\vjmar\\.cache\\huggingface\\token\n",
+      "Login successful\n"
+     ]
+    }
+   ],
+   "source": [
+    "import GPUtil\n",
+    "from huggingface_hub import HfApi, HfFolder, login\n",
+    "\n",
+    "print(GPUtil.showUtilization())\n",
+    "print(\"---------------------------------------------------------------\")\n",
+    "token = \"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\"\n",
+    "login(\"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\")\n",
+    "! git config --global credential.helper store"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2b5a41be",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "CKPT = 't5-base'\n",
+    "from transformers import AutoTokenizer, T5ForConditionalGeneration\n",
+    "model = T5ForConditionalGeneration.from_pretrained(CKPT)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "75c5f40c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\vjmar\\Documents\\1. Code\\PythonEnvs\\hf-env\\lib\\site-packages\\transformers\\models\\t5\\tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
+      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
+      "- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.\n",
+      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
+      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "tokenizer = AutoTokenizer.from_pretrained(CKPT)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ca3c201b",
+   "metadata": {},
+   "source": [
+    "# Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "f9ab72e4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Found cached dataset wikisql (C:/Users/vjmar/.cache/huggingface/datasets/wikisql/default/0.1.0/7037bfe6a42b1ca2b6ac3ccacba5253b1825d31379e9cc626fc79a620977252d)\n",
+      "Found cached dataset wikisql (C:/Users/vjmar/.cache/huggingface/datasets/wikisql/default/0.1.0/7037bfe6a42b1ca2b6ac3ccacba5253b1825d31379e9cc626fc79a620977252d)\n"
+     ]
+    }
+   ],
+   "source": [
+    "try:\n",
+    "    from datasets import load_dataset\n",
+    "except ModuleNotFoundError:\n",
+    "    !pip install datasets\n",
+    "    from datasets import load_dataset\n",
+    "\n",
+    "train_data = load_dataset('wikisql', split='train+validation')\n",
+    "test_data = load_dataset('wikisql', split='test')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "0e62f295",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading cached processed dataset at C:\\Users\\vjmar\\.cache\\huggingface\\datasets\\wikisql\\default\\0.1.0\\7037bfe6a42b1ca2b6ac3ccacba5253b1825d31379e9cc626fc79a620977252d\\cache-19a43a9806773ee1.arrow\n",
+      "Loading cached processed dataset at C:\\Users\\vjmar\\.cache\\huggingface\\datasets\\wikisql\\default\\0.1.0\\7037bfe6a42b1ca2b6ac3ccacba5253b1825d31379e9cc626fc79a620977252d\\cache-620e43f13a2f425c.arrow\n"
+     ]
+    }
+   ],
+   "source": [
+    "def format_dataset(example):\n",
+    "  try:\n",
+    "    condition:str = example['sql']['conds']['condition'][0]\n",
+    "  except:\n",
+    "    condition = \"\"\n",
+    "  target = f\"{example['sql']['human_readable']}\"\n",
+    "  \n",
+    "  if condition.lower() in target.lower() and condition != \"\":\n",
+    "    target = target.lower().replace(condition.lower(), f\"'{condition}'\")\n",
+    "\n",
+    "  cols = \"\"\n",
+    "  for item in example['table']['header']:\n",
+    "    cols = cols + item.lower() + \", \"\n",
+    "  \n",
+    "\n",
+    "  obj =  {'input': f\"translate to SQL: {example['question']} | table: {cols})\".replace(\", )\", \"\" ),\n",
+    "          \"target\": target}\n",
+    "  return obj\n",
+    "\n",
+    "# Apply Data Formatting\n",
+    "train_data = train_data.map(format_dataset, remove_columns=train_data.column_names)\n",
+    "test_data = test_data.map(format_dataset, remove_columns=test_data.column_names)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e68f9896",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f47e6cd6",
+   "metadata": {},
+   "source": [
+    "# Data Format for Training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "15ec294c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def map_to_length(x): # map article and summary len to dict as well as if sample is longer than 512 tokens\n",
+    "    \n",
+    "      # from transformers import AutoTokenizer  \n",
+    "      # tokenizer = AutoTokenizer.from_pretrained(\"t5-base\")  \n",
+    "      x[\"input_len\"] = len(tokenizer(x[\"input\"]).input_ids)\n",
+    "      x[\"input_longer_256\"] = int(x[\"input_len\"] > 256)\n",
+    "      x[\"input_longer_128\"] = int(x[\"input_len\"] > 128)\n",
+    "      x[\"input_longer_64\"] = int(x[\"input_len\"] > 64)\n",
+    "      x[\"out_len\"] = len(tokenizer(x[\"target\"]).input_ids)\n",
+    "      x[\"out_longer_256\"] = int(x[\"out_len\"] > 256)\n",
+    "      x[\"out_longer_128\"] = int(x[\"out_len\"] > 128)\n",
+    "      x[\"out_longer_64\"] = int(x[\"out_len\"] > 64)\n",
+    "      return x\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "7b5df2e4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'datasets.arrow_dataset.Dataset'>\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [00:04<00:00, 2380.77ex/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "sample_size = 10000\n",
+    "print(type(train_data))\n",
+    "data_stats = train_data.select(range(sample_size)).map(map_to_length) #, num_proc=4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "e4589f66",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 24.68ba/s]\n",
+      "Loading cached processed dataset at C:\\Users\\vjmar\\.cache\\huggingface\\datasets\\wikisql\\default\\0.1.0\\7037bfe6a42b1ca2b6ac3ccacba5253b1825d31379e9cc626fc79a620977252d\\cache-aefcd3f1e400ed5a.arrow\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Input Mean: 46.515, %-Input > 256:0.0,  %-Input > 128:0.0037, %-Input > 64:0.0712 Output Mean:19.1137, %-Output > 256:0.0, %-Output > 128:0.0002, %-Output > 64:0.0007\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|                                                                                                                                                 | 0/16 [00:00<?, ?ba/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n",
+      "C:\\Users\\vjmar\\Documents\\1. Code\\PythonEnvs\\hf-env\\lib\\site-packages\\transformers\\tokenization_utils_base.py:2339: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).\n",
+      "  warnings.warn(\n",
+      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:04<00:00,  3.88ba/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "def compute_and_print_stats(x):\n",
+    "  if len(x[\"input_len\"]) == sample_size:\n",
+    "    print(\n",
+    "        \"Input Mean: {}, %-Input > 256:{},  %-Input > 128:{}, %-Input > 64:{} Output Mean:{}, %-Output > 256:{}, %-Output > 128:{}, %-Output > 64:{}\".format(\n",
+    "            sum(x[\"input_len\"]) / sample_size,\n",
+    "            sum(x[\"input_longer_256\"]) / sample_size,\n",
+    "            sum(x[\"input_longer_128\"]) / sample_size,\n",
+    "            sum(x[\"input_longer_64\"]) / sample_size,   \n",
+    "            sum(x[\"out_len\"]) / sample_size,\n",
+    "            sum(x[\"out_longer_256\"]) / sample_size,\n",
+    "            sum(x[\"out_longer_128\"]) / sample_size,\n",
+    "            sum(x[\"out_longer_64\"]) / sample_size,\n",
+    "        )\n",
+    "    )\n",
+    "\n",
+    "output = data_stats.map(\n",
+    "  compute_and_print_stats, \n",
+    "  batched=True,\n",
+    "  batch_size=-1,\n",
+    ")\n",
+    "\n",
+    "# tokenize the examples\n",
+    "def convert_to_features(example_batch):\n",
+    "    input_encodings = tokenizer.batch_encode_plus(example_batch['input'], pad_to_max_length=True, max_length=64)\n",
+    "    target_encodings = tokenizer.batch_encode_plus(example_batch['target'], pad_to_max_length=True, max_length=64)\n",
+    "\n",
+    "    encodings = {\n",
+    "        'input_ids': input_encodings['input_ids'], \n",
+    "        'attention_mask': input_encodings['attention_mask'],\n",
+    "        'labels': target_encodings['input_ids'],\n",
+    "        'decoder_attention_mask': target_encodings['attention_mask']\n",
+    "    }\n",
+    "\n",
+    "    return encodings\n",
+    "\n",
+    "train_data = train_data.map(convert_to_features, batched=True, remove_columns=train_data.column_names)\n",
+    "test_data = test_data.map(convert_to_features, batched=True, remove_columns=test_data.column_names)\n",
+    "\n",
+    "columns = ['input_ids', 'attention_mask', 'labels', 'decoder_attention_mask']\n",
+    "\n",
+    "train_data.set_format(type='torch', columns=columns)\n",
+    "test_data.set_format(type='torch', columns=columns)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d439da79",
+   "metadata": {},
+   "source": [
+    "# Trainer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "f1cee70c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import Seq2SeqTrainer\n",
+    "from transformers import Seq2SeqTrainingArguments\n",
+    "import os\n",
+    "\n",
+    "training_args = Seq2SeqTrainingArguments(\n",
+    "    output_dir=str(os.getcwd()),\n",
+    "    per_device_train_batch_size=16,\n",
+    "    num_train_epochs=5,\n",
+    "    per_device_eval_batch_size=16,\n",
+    "    predict_with_generate=True,\n",
+    "    evaluation_strategy=\"epoch\",\n",
+    "    do_train=True,\n",
+    "    do_eval=True,\n",
+    "    logging_steps=500,\n",
+    "    save_strategy=\"epoch\",\n",
+    "    #save_steps=1000,\n",
+    "    #eval_steps=1000,\n",
+    "    overwrite_output_dir=True,\n",
+    "    save_total_limit=3,\n",
+    "    load_best_model_at_end=True,\n",
+    "    push_to_hub=True\n",
+    "    #fp16=True, \n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "4ee61c54",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\vjmar\\AppData\\Local\\Temp\\ipykernel_29244\\418146841.py:3: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n",
+      "  rouge = load_metric(\"rouge\")\n"
+     ]
+    }
+   ],
+   "source": [
+    "from datasets import load_metric\n",
+    "\n",
+    "rouge = load_metric(\"rouge\")\n",
+    "\n",
+    "def compute_metrics(pred):\n",
+    "    labels_ids = pred.label_ids\n",
+    "    pred_ids = pred.predictions\n",
+    "\n",
+    "    # all unnecessary tokens are removed\n",
+    "    pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)\n",
+    "    labels_ids[labels_ids == -100] = tokenizer.pad_token_id\n",
+    "    label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)\n",
+    "\n",
+    "    rouge_output = rouge.compute(predictions=pred_str, references=label_str, rouge_types=[\"rouge2\"])[\"rouge2\"].mid\n",
+    "\n",
+    "    return {\n",
+    "        \"rouge2_precision\": round(rouge_output.precision, 4),\n",
+    "        \"rouge2_recall\": round(rouge_output.recall, 4),\n",
+    "        \"rouge2_fmeasure\": round(rouge_output.fmeasure, 4),\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f6c0f580",
+   "metadata": {},
+   "source": [
+    "# Define Trainer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b71acd7c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Cloning https://huggingface.co/vjt/T5Training into local empty directory.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# instantiate trainer\n",
+    "trainer = Seq2SeqTrainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    compute_metrics=compute_metrics,\n",
+    "    train_dataset=train_data,\n",
+    "    eval_dataset=test_data,\n",
+    ")\n",
+    "import os\n",
+    "trainer.evaluate()\n",
+    "trainer.train()\n",
+    "trainer.save_model()\n",
+    "tokenizer.save_pretrained(os.getcwd())\n",
+    "trainer.create_model_card()\n",
+    "trainer.push_to_hub()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "76ca29ea",
+   "metadata": {},
+   "source": [
+    "# Test Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d39e7e80",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "CKPT = os.join(os.getcwd(), 't5-base-finetuned-wikisql')\n",
+    "from transformers import AutoTokenizer, T5ForConditionalGeneration\n",
+    "tokenizer = AutoTokenizer.from_pretrained(CKPT)\n",
+    "model = T5ForConditionalGeneration.from_pretrained(CKPT)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "58f4258c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_data = load_dataset('wikisql', split='test')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ecb1ddde",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def translate_to_sql(text):\n",
+    "    inputs = tokenizer(text, padding='longest', max_length=64, return_tensors='pt')\n",
+    "    input_ids = inputs.input_ids\n",
+    "    attention_mask = inputs.attention_mask\n",
+    "    output = model.generate(input_ids, attention_mask=attention_mask, max_length=64)\n",
+    "\n",
+    "    return tokenizer.decode(output[0], skip_special_tokens=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "506e28e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i in range(0,100,10):\n",
+    "  print('translate to SQL: ' + test_data[i]['question'])\n",
+    "  print('Predict. :' + translate_to_sql('translate to SQL: ' + test_data[i]['question']))\n",
+    "  print('Expected: ' + test_data[i]['sql']['human_readable'])\n",
+    "  print('=================================\\n')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18f1cdfe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"translate to SQL: Which employee has the highest salary? Columns: employee_id, name, year, parameters, engineer\"\n",
+    "translate_to_sql(text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8bd0a073",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

T5Train.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

config.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "_name_or_path": "t5-base",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "d_ff": 3072,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.26.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0230022aa7695bf04abd9e9df4c8dc672585e9502d74cf44536af76b08d462b3
+size 891702929

runs/Jan27_10-28-12_Vince-Desktop/1674815969.93369/events.out.tfevents.1674815969.Vince-Desktop.29244.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:153031d0285df04013da2988af5558a36310102c42fcbde5500da89058adbe4d
+size 6026

runs/Jan27_10-28-12_Vince-Desktop/events.out.tfevents.1674815969.Vince-Desktop.29244.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc6cbc5aab81cde1107d9d8466367cb802b080ba4d8b81e3d0a462dbd7404e78
+size 6961

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ece606258a66c05a9a0c6ef55f2f476fa63347892d0d444978458027c340a25
+size 3707