{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c8ba5338",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import keras\n",
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "from tensorflow.keras import layers\n",
    "import datetime\n",
    "from packaging import version\n",
    "\n",
    "\n",
    "from enum import Enum\n",
    "\n",
    "import os\n",
    "import gc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1ef43cba",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TensorFlow version:  2.9.1\n",
      "Found GPU at: /device:GPU:0\n"
     ]
    }
   ],
   "source": [
    "print(\"TensorFlow version: \", tf.__version__)\n",
    "device_name = tf.test.gpu_device_name()\n",
    "if not device_name:\n",
    "  raise SystemError('GPU device not found')\n",
    "print('Found GPU at: {}'.format(device_name))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "27a42a63",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x_0.npy\n",
      "x_1.npy\n",
      "x_10.npy\n",
      "x_100.npy\n",
      "x_101.npy\n",
      "x_102.npy\n",
      "x_103.npy\n",
      "x_104.npy\n",
      "x_105.npy\n",
      "x_106.npy\n",
      "x_107.npy\n",
      "x_108.npy\n",
      "x_109.npy\n",
      "x_11.npy\n",
      "x_110.npy\n",
      "x_111.npy\n",
      "x_112.npy\n",
      "x_113.npy\n",
      "x_114.npy\n",
      "x_115.npy\n",
      "x_116.npy\n",
      "x_117.npy\n",
      "x_118.npy\n",
      "x_119.npy\n",
      "x_12.npy\n",
      "x_120.npy\n",
      "x_121.npy\n",
      "x_122.npy\n",
      "x_123.npy\n",
      "x_124.npy\n",
      "x_125.npy\n",
      "x_126.npy\n",
      "x_127.npy\n",
      "x_128.npy\n",
      "x_129.npy\n",
      "x_13.npy\n",
      "x_130.npy\n",
      "x_131.npy\n",
      "x_132.npy\n",
      "x_133.npy\n",
      "x_134.npy\n",
      "x_135.npy\n",
      "x_136.npy\n",
      "x_137.npy\n",
      "x_138.npy\n",
      "x_139.npy\n",
      "x_14.npy\n",
      "x_140.npy\n",
      "x_141.npy\n",
      "x_142.npy\n",
      "x_143.npy\n",
      "x_144.npy\n",
      "x_145.npy\n",
      "x_146.npy\n",
      "x_147.npy\n",
      "x_148.npy\n",
      "x_149.npy\n",
      "x_15.npy\n",
      "x_150.npy\n",
      "x_151.npy\n",
      "x_152.npy\n",
      "x_153.npy\n",
      "x_154.npy\n",
      "x_155.npy\n",
      "x_156.npy\n",
      "x_157.npy\n",
      "x_158.npy\n",
      "x_159.npy\n",
      "x_16.npy\n",
      "x_160.npy\n",
      "x_161.npy\n",
      "x_162.npy\n",
      "x_163.npy\n",
      "x_164.npy\n",
      "x_165.npy\n",
      "x_166.npy\n",
      "x_167.npy\n",
      "x_168.npy\n",
      "x_169.npy\n",
      "x_17.npy\n",
      "x_170.npy\n",
      "x_171.npy\n",
      "x_172.npy\n",
      "x_173.npy\n",
      "x_174.npy\n",
      "x_175.npy\n",
      "x_176.npy\n",
      "x_177.npy\n",
      "x_178.npy\n",
      "x_179.npy\n",
      "x_18.npy\n",
      "x_180.npy\n",
      "x_181.npy\n",
      "x_182.npy\n",
      "x_183.npy\n",
      "x_184.npy\n",
      "x_185.npy\n",
      "x_186.npy\n",
      "x_187.npy\n",
      "x_188.npy\n",
      "x_189.npy\n",
      "x_19.npy\n",
      "x_190.npy\n",
      "x_191.npy\n",
      "x_192.npy\n",
      "x_193.npy\n",
      "x_194.npy\n",
      "x_195.npy\n",
      "x_196.npy\n",
      "x_197.npy\n",
      "x_198.npy\n",
      "x_199.npy\n",
      "x_2.npy\n",
      "x_20.npy\n",
      "x_200.npy\n",
      "x_201.npy\n",
      "x_202.npy\n",
      "x_203.npy\n",
      "x_204.npy\n",
      "x_205.npy\n",
      "x_206.npy\n",
      "x_207.npy\n",
      "x_208.npy\n",
      "x_209.npy\n",
      "x_21.npy\n",
      "x_210.npy\n",
      "x_211.npy\n",
      "x_212.npy\n",
      "x_213.npy\n",
      "x_214.npy\n",
      "x_215.npy\n",
      "x_216.npy\n",
      "x_217.npy\n",
      "x_218.npy\n",
      "x_219.npy\n",
      "x_22.npy\n",
      "x_220.npy\n",
      "x_23.npy\n",
      "x_24.npy\n",
      "x_25.npy\n",
      "x_26.npy\n",
      "x_27.npy\n",
      "x_28.npy\n",
      "x_29.npy\n",
      "x_3.npy\n",
      "x_30.npy\n",
      "x_31.npy\n",
      "x_32.npy\n",
      "x_33.npy\n",
      "x_34.npy\n",
      "x_35.npy\n",
      "x_36.npy\n",
      "x_37.npy\n",
      "x_38.npy\n",
      "x_39.npy\n",
      "x_4.npy\n",
      "x_40.npy\n",
      "x_41.npy\n",
      "x_42.npy\n",
      "x_43.npy\n",
      "x_44.npy\n",
      "x_45.npy\n",
      "x_46.npy\n",
      "x_47.npy\n",
      "x_48.npy\n",
      "x_49.npy\n",
      "x_5.npy\n",
      "x_50.npy\n",
      "x_51.npy\n",
      "x_52.npy\n",
      "x_53.npy\n",
      "x_54.npy\n",
      "x_55.npy\n",
      "x_56.npy\n",
      "x_57.npy\n",
      "x_58.npy\n",
      "x_59.npy\n",
      "x_6.npy\n",
      "x_60.npy\n",
      "x_61.npy\n",
      "x_62.npy\n",
      "x_63.npy\n",
      "x_64.npy\n",
      "x_65.npy\n",
      "x_66.npy\n",
      "x_67.npy\n",
      "x_68.npy\n",
      "x_69.npy\n",
      "x_7.npy\n",
      "x_70.npy\n",
      "x_71.npy\n",
      "x_72.npy\n",
      "x_73.npy\n",
      "x_74.npy\n",
      "x_75.npy\n",
      "x_76.npy\n",
      "x_77.npy\n",
      "x_78.npy\n",
      "x_79.npy\n",
      "x_8.npy\n",
      "x_80.npy\n",
      "x_81.npy\n",
      "x_82.npy\n",
      "x_83.npy\n",
      "x_84.npy\n",
      "x_85.npy\n",
      "x_86.npy\n",
      "x_87.npy\n",
      "x_88.npy\n",
      "x_89.npy\n",
      "x_9.npy\n",
      "x_90.npy\n",
      "x_91.npy\n",
      "x_92.npy\n",
      "x_93.npy\n",
      "x_94.npy\n",
      "x_95.npy\n",
      "x_96.npy\n",
      "x_97.npy\n",
      "x_98.npy\n",
      "x_99.npy\n",
      "y_0.npy\n",
      "y_1.npy\n",
      "y_10.npy\n",
      "y_100.npy\n",
      "y_101.npy\n",
      "y_102.npy\n",
      "y_103.npy\n",
      "y_104.npy\n",
      "y_105.npy\n",
      "y_106.npy\n",
      "y_107.npy\n",
      "y_108.npy\n",
      "y_109.npy\n",
      "y_11.npy\n",
      "y_110.npy\n",
      "y_111.npy\n",
      "y_112.npy\n",
      "y_113.npy\n",
      "y_114.npy\n",
      "y_115.npy\n",
      "y_116.npy\n",
      "y_117.npy\n",
      "y_118.npy\n",
      "y_119.npy\n",
      "y_12.npy\n",
      "y_120.npy\n",
      "y_121.npy\n",
      "y_122.npy\n",
      "y_123.npy\n",
      "y_124.npy\n",
      "y_125.npy\n",
      "y_126.npy\n",
      "y_127.npy\n",
      "y_128.npy\n",
      "y_129.npy\n",
      "y_13.npy\n",
      "y_130.npy\n",
      "y_131.npy\n",
      "y_132.npy\n",
      "y_133.npy\n",
      "y_134.npy\n",
      "y_135.npy\n",
      "y_136.npy\n",
      "y_137.npy\n",
      "y_138.npy\n",
      "y_139.npy\n",
      "y_14.npy\n",
      "y_140.npy\n",
      "y_141.npy\n",
      "y_142.npy\n",
      "y_143.npy\n",
      "y_144.npy\n",
      "y_145.npy\n",
      "y_146.npy\n",
      "y_147.npy\n",
      "y_148.npy\n",
      "y_149.npy\n",
      "y_15.npy\n",
      "y_150.npy\n",
      "y_151.npy\n",
      "y_152.npy\n",
      "y_153.npy\n",
      "y_154.npy\n",
      "y_155.npy\n",
      "y_156.npy\n",
      "y_157.npy\n",
      "y_158.npy\n",
      "y_159.npy\n",
      "y_16.npy\n",
      "y_160.npy\n",
      "y_161.npy\n",
      "y_162.npy\n",
      "y_163.npy\n",
      "y_164.npy\n",
      "y_165.npy\n",
      "y_166.npy\n",
      "y_167.npy\n",
      "y_168.npy\n",
      "y_169.npy\n",
      "y_17.npy\n",
      "y_170.npy\n",
      "y_171.npy\n",
      "y_172.npy\n",
      "y_173.npy\n",
      "y_174.npy\n",
      "y_175.npy\n",
      "y_176.npy\n",
      "y_177.npy\n",
      "y_178.npy\n",
      "y_179.npy\n",
      "y_18.npy\n",
      "y_180.npy\n",
      "y_181.npy\n",
      "y_182.npy\n",
      "y_183.npy\n",
      "y_184.npy\n",
      "y_185.npy\n",
      "y_186.npy\n",
      "y_187.npy\n",
      "y_188.npy\n",
      "y_189.npy\n",
      "y_19.npy\n",
      "y_190.npy\n",
      "y_191.npy\n",
      "y_192.npy\n",
      "y_193.npy\n",
      "y_194.npy\n",
      "y_195.npy\n",
      "y_196.npy\n",
      "y_197.npy\n",
      "y_198.npy\n",
      "y_199.npy\n",
      "y_2.npy\n",
      "y_20.npy\n",
      "y_200.npy\n",
      "y_201.npy\n",
      "y_202.npy\n",
      "y_203.npy\n",
      "y_204.npy\n",
      "y_205.npy\n",
      "y_206.npy\n",
      "y_207.npy\n",
      "y_208.npy\n",
      "y_209.npy\n",
      "y_21.npy\n",
      "y_210.npy\n",
      "y_211.npy\n",
      "y_212.npy\n",
      "y_213.npy\n",
      "y_214.npy\n",
      "y_215.npy\n",
      "y_216.npy\n",
      "y_217.npy\n",
      "y_218.npy\n",
      "y_219.npy\n",
      "y_22.npy\n",
      "y_220.npy\n",
      "y_23.npy\n",
      "y_24.npy\n",
      "y_25.npy\n",
      "y_26.npy\n",
      "y_27.npy\n",
      "y_28.npy\n",
      "y_29.npy\n",
      "y_3.npy\n",
      "y_30.npy\n",
      "y_31.npy\n",
      "y_32.npy\n",
      "y_33.npy\n",
      "y_34.npy\n",
      "y_35.npy\n",
      "y_36.npy\n",
      "y_37.npy\n",
      "y_38.npy\n",
      "y_39.npy\n",
      "y_4.npy\n",
      "y_40.npy\n",
      "y_41.npy\n",
      "y_42.npy\n",
      "y_43.npy\n",
      "y_44.npy\n",
      "y_45.npy\n",
      "y_46.npy\n",
      "y_47.npy\n",
      "y_48.npy\n",
      "y_49.npy\n",
      "y_5.npy\n",
      "y_50.npy\n",
      "y_51.npy\n",
      "y_52.npy\n",
      "y_53.npy\n",
      "y_54.npy\n",
      "y_55.npy\n",
      "y_56.npy\n",
      "y_57.npy\n",
      "y_58.npy\n",
      "y_59.npy\n",
      "y_6.npy\n",
      "y_60.npy\n",
      "y_61.npy\n",
      "y_62.npy\n",
      "y_63.npy\n",
      "y_64.npy\n",
      "y_65.npy\n",
      "y_66.npy\n",
      "y_67.npy\n",
      "y_68.npy\n",
      "y_69.npy\n",
      "y_7.npy\n",
      "y_70.npy\n",
      "y_71.npy\n",
      "y_72.npy\n",
      "y_73.npy\n",
      "y_74.npy\n",
      "y_75.npy\n",
      "y_76.npy\n",
      "y_77.npy\n",
      "y_78.npy\n",
      "y_79.npy\n",
      "y_8.npy\n",
      "y_80.npy\n",
      "y_81.npy\n",
      "y_82.npy\n",
      "y_83.npy\n",
      "y_84.npy\n",
      "y_85.npy\n",
      "y_86.npy\n",
      "y_87.npy\n",
      "y_88.npy\n",
      "y_89.npy\n",
      "y_9.npy\n",
      "y_90.npy\n",
      "y_91.npy\n",
      "y_92.npy\n",
      "y_93.npy\n",
      "y_94.npy\n",
      "y_95.npy\n",
      "y_96.npy\n",
      "y_97.npy\n",
      "y_98.npy\n",
      "y_99.npy\n"
     ]
    }
   ],
   "source": [
    "data_path = \"data\"\n",
    "for file in os.listdir(data_path):\n",
    "    print(file)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "18b502b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_classes = 2\n",
    "def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):\n",
    "    # Normalization and Attention\n",
    "    x = layers.LayerNormalization(epsilon=1e-6)(inputs)\n",
    "    x = layers.MultiHeadAttention(\n",
    "        key_dim=head_size, num_heads=num_heads, dropout=dropout\n",
    "    )(x, x)\n",
    "    x = layers.Dropout(dropout)(x)\n",
    "    res = x + inputs\n",
    "\n",
    "    # Feed Forward Part\n",
    "    x = layers.LayerNormalization(epsilon=1e-6)(res)\n",
    "    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation=\"relu\")(x)\n",
    "    x = layers.Dropout(dropout)(x)\n",
    "    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)\n",
    "    return x + res\n",
    "\n",
    "def build_model(\n",
    "    input_shape,\n",
    "    head_size,\n",
    "    num_heads,\n",
    "    ff_dim,\n",
    "    num_transformer_blocks,\n",
    "    mlp_units,\n",
    "    dropout=0,\n",
    "    mlp_dropout=0,\n",
    "):\n",
    "    inputs = keras.Input(shape=input_shape)\n",
    "    x = inputs\n",
    "    for _ in range(num_transformer_blocks):\n",
    "        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)\n",
    "\n",
    "    x = layers.GlobalAveragePooling1D(data_format=\"channels_first\")(x)\n",
    "    for dim in mlp_units:\n",
    "        x = layers.Dense(dim, activation=\"relu\")(x)\n",
    "        x = layers.Dropout(mlp_dropout)(x)\n",
    "    #changed from softmax to relu\n",
    "    outputs = layers.Dense(n_classes, activation=\"relu\")(x)\n",
    "    return keras.Model(inputs, outputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a9ce858a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# optional tensorboard training metrics\n",
    "log_dir = \"logs/fit/\"\n",
    "tensorboard_callback = tboard_callback = tf.keras.callbacks.TensorBoard(log_dir = log_dir,\n",
    "                                                 histogram_freq = 1,\n",
    "                                                 profile_batch = '500,520')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "0b4c42e4",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = build_model((100,1), \n",
    "    head_size=25,\n",
    "    num_heads=4,\n",
    "    ff_dim=4,\n",
    "    num_transformer_blocks=6,\n",
    "    mlp_units=[80,50,30], #\n",
    "    mlp_dropout=0,\n",
    "    dropout=0)\n",
    "model.compile(\n",
    "    loss=\"mae\",\n",
    "    optimizer=keras.optimizers.Adam(learning_rate=1e-4),\n",
    "    metrics=[\"mae\"],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "22a03f0d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"model\"\n",
      "__________________________________________________________________________________________________\n",
      " Layer (type)                   Output Shape         Param #     Connected to                     \n",
      "==================================================================================================\n",
      " input_1 (InputLayer)           [(None, 100, 1)]     0           []                               \n",
      "                                                                                                  \n",
      " layer_normalization (LayerNorm  (None, 100, 1)      2           ['input_1[0][0]']                \n",
      " alization)                                                                                       \n",
      "                                                                                                  \n",
      " multi_head_attention (MultiHea  (None, 100, 1)      701         ['layer_normalization[0][0]',    \n",
      " dAttention)                                                      'layer_normalization[0][0]']    \n",
      "                                                                                                  \n",
      " dropout (Dropout)              (None, 100, 1)       0           ['multi_head_attention[0][0]']   \n",
      "                                                                                                  \n",
      " tf.__operators__.add (TFOpLamb  (None, 100, 1)      0           ['dropout[0][0]',                \n",
      " da)                                                              'input_1[0][0]']                \n",
      "                                                                                                  \n",
      " layer_normalization_1 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add[0][0]']   \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " conv1d (Conv1D)                (None, 100, 4)       8           ['layer_normalization_1[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_1 (Dropout)            (None, 100, 4)       0           ['conv1d[0][0]']                 \n",
      "                                                                                                  \n",
      " conv1d_1 (Conv1D)              (None, 100, 1)       5           ['dropout_1[0][0]']              \n",
      "                                                                                                  \n",
      " tf.__operators__.add_1 (TFOpLa  (None, 100, 1)      0           ['conv1d_1[0][0]',               \n",
      " mbda)                                                            'tf.__operators__.add[0][0]']   \n",
      "                                                                                                  \n",
      " layer_normalization_2 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_1[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " multi_head_attention_1 (MultiH  (None, 100, 1)      701         ['layer_normalization_2[0][0]',  \n",
      " eadAttention)                                                    'layer_normalization_2[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_2 (Dropout)            (None, 100, 1)       0           ['multi_head_attention_1[0][0]'] \n",
      "                                                                                                  \n",
      " tf.__operators__.add_2 (TFOpLa  (None, 100, 1)      0           ['dropout_2[0][0]',              \n",
      " mbda)                                                            'tf.__operators__.add_1[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_3 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_2[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " conv1d_2 (Conv1D)              (None, 100, 4)       8           ['layer_normalization_3[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_3 (Dropout)            (None, 100, 4)       0           ['conv1d_2[0][0]']               \n",
      "                                                                                                  \n",
      " conv1d_3 (Conv1D)              (None, 100, 1)       5           ['dropout_3[0][0]']              \n",
      "                                                                                                  \n",
      " tf.__operators__.add_3 (TFOpLa  (None, 100, 1)      0           ['conv1d_3[0][0]',               \n",
      " mbda)                                                            'tf.__operators__.add_2[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_4 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_3[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " multi_head_attention_2 (MultiH  (None, 100, 1)      701         ['layer_normalization_4[0][0]',  \n",
      " eadAttention)                                                    'layer_normalization_4[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_4 (Dropout)            (None, 100, 1)       0           ['multi_head_attention_2[0][0]'] \n",
      "                                                                                                  \n",
      " tf.__operators__.add_4 (TFOpLa  (None, 100, 1)      0           ['dropout_4[0][0]',              \n",
      " mbda)                                                            'tf.__operators__.add_3[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_5 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_4[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " conv1d_4 (Conv1D)              (None, 100, 4)       8           ['layer_normalization_5[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_5 (Dropout)            (None, 100, 4)       0           ['conv1d_4[0][0]']               \n",
      "                                                                                                  \n",
      " conv1d_5 (Conv1D)              (None, 100, 1)       5           ['dropout_5[0][0]']              \n",
      "                                                                                                  \n",
      " tf.__operators__.add_5 (TFOpLa  (None, 100, 1)      0           ['conv1d_5[0][0]',               \n",
      " mbda)                                                            'tf.__operators__.add_4[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_6 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_5[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " multi_head_attention_3 (MultiH  (None, 100, 1)      701         ['layer_normalization_6[0][0]',  \n",
      " eadAttention)                                                    'layer_normalization_6[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_6 (Dropout)            (None, 100, 1)       0           ['multi_head_attention_3[0][0]'] \n",
      "                                                                                                  \n",
      " tf.__operators__.add_6 (TFOpLa  (None, 100, 1)      0           ['dropout_6[0][0]',              \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " mbda)                                                            'tf.__operators__.add_5[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_7 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_6[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " conv1d_6 (Conv1D)              (None, 100, 4)       8           ['layer_normalization_7[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_7 (Dropout)            (None, 100, 4)       0           ['conv1d_6[0][0]']               \n",
      "                                                                                                  \n",
      " conv1d_7 (Conv1D)              (None, 100, 1)       5           ['dropout_7[0][0]']              \n",
      "                                                                                                  \n",
      " tf.__operators__.add_7 (TFOpLa  (None, 100, 1)      0           ['conv1d_7[0][0]',               \n",
      " mbda)                                                            'tf.__operators__.add_6[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_8 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_7[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " multi_head_attention_4 (MultiH  (None, 100, 1)      701         ['layer_normalization_8[0][0]',  \n",
      " eadAttention)                                                    'layer_normalization_8[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_8 (Dropout)            (None, 100, 1)       0           ['multi_head_attention_4[0][0]'] \n",
      "                                                                                                  \n",
      " tf.__operators__.add_8 (TFOpLa  (None, 100, 1)      0           ['dropout_8[0][0]',              \n",
      " mbda)                                                            'tf.__operators__.add_7[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_9 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_8[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " conv1d_8 (Conv1D)              (None, 100, 4)       8           ['layer_normalization_9[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_9 (Dropout)            (None, 100, 4)       0           ['conv1d_8[0][0]']               \n",
      "                                                                                                  \n",
      " conv1d_9 (Conv1D)              (None, 100, 1)       5           ['dropout_9[0][0]']              \n",
      "                                                                                                  \n",
      " tf.__operators__.add_9 (TFOpLa  (None, 100, 1)      0           ['conv1d_9[0][0]',               \n",
      " mbda)                                                            'tf.__operators__.add_8[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_10 (LayerN  (None, 100, 1)      2           ['tf.__operators__.add_9[0][0]'] \n",
      " ormalization)                                                                                    \n",
      "                                                                                                  \n",
      " multi_head_attention_5 (MultiH  (None, 100, 1)      701         ['layer_normalization_10[0][0]', \n",
      " eadAttention)                                                    'layer_normalization_10[0][0]'] \n",
      "                                                                                                  \n",
      " dropout_10 (Dropout)           (None, 100, 1)       0           ['multi_head_attention_5[0][0]'] \n",
      "                                                                                                  \n",
      " tf.__operators__.add_10 (TFOpL  (None, 100, 1)      0           ['dropout_10[0][0]',             \n",
      " ambda)                                                           'tf.__operators__.add_9[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_11 (LayerN  (None, 100, 1)      2           ['tf.__operators__.add_10[0][0]']\n",
      " ormalization)                                                                                    \n",
      "                                                                                                  \n",
      " conv1d_10 (Conv1D)             (None, 100, 4)       8           ['layer_normalization_11[0][0]'] \n",
      "                                                                                                  \n",
      " dropout_11 (Dropout)           (None, 100, 4)       0           ['conv1d_10[0][0]']              \n",
      "                                                                                                  \n",
      " conv1d_11 (Conv1D)             (None, 100, 1)       5           ['dropout_11[0][0]']             \n",
      "                                                                                                  \n",
      " tf.__operators__.add_11 (TFOpL  (None, 100, 1)      0           ['conv1d_11[0][0]',              \n",
      " ambda)                                                           'tf.__operators__.add_10[0][0]']\n",
      "                                                                                                  \n",
      " global_average_pooling1d (Glob  (None, 100)         0           ['tf.__operators__.add_11[0][0]']\n",
      " alAveragePooling1D)                                                                              \n",
      "                                                                                                  \n",
      " dense (Dense)                  (None, 80)           8080        ['global_average_pooling1d[0][0]'\n",
      "                                                                 ]                                \n",
      "                                                                                                  \n",
      " dropout_12 (Dropout)           (None, 80)           0           ['dense[0][0]']                  \n",
      "                                                                                                  \n",
      " dense_1 (Dense)                (None, 50)           4050        ['dropout_12[0][0]']             \n",
      "                                                                                                  \n",
      " dropout_13 (Dropout)           (None, 50)           0           ['dense_1[0][0]']                \n",
      "                                                                                                  \n",
      " dense_2 (Dense)                (None, 30)           1530        ['dropout_13[0][0]']             \n",
      "                                                                                                  \n",
      " dropout_14 (Dropout)           (None, 30)           0           ['dense_2[0][0]']                \n",
      "                                                                                                  \n",
      " dense_3 (Dense)                (None, 2)            62          ['dropout_14[0][0]']             \n",
      "                                                                                                  \n",
      "==================================================================================================\n",
      "Total params: 18,030\n",
      "Trainable params: 18,030\n",
      "Non-trainable params: 0\n",
      "__________________________________________________________________________________________________\n",
      "0\n",
      "1\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n",
      "8\n",
      "9\n",
      "(772051, 100, 1)\n",
      "6032/6032 [==============================] - 283s 45ms/step - loss: 0.0187 - mae: 0.0187 - val_loss: 0.0197 - val_mae: 0.0197\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 84). These functions will not be directly callable after loading.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10\n",
      "11\n",
      "12\n",
      "13\n",
      "14\n",
      "15\n",
      "16\n",
      "17\n",
      "18\n",
      "19\n",
      "(851081, 100, 1)\n",
      "6650/6650 [==============================] - 295s 44ms/step - loss: 0.0178 - mae: 0.0178 - val_loss: 0.0196 - val_mae: 0.0196\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 84). These functions will not be directly callable after loading.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "20\n",
      "21\n",
      "22\n",
      "23\n",
      "24\n",
      "25\n",
      "26\n",
      "27\n",
      "28\n",
      "29\n",
      "(674329, 100, 1)\n",
      "5269/5269 [==============================] - 232s 44ms/step - loss: 0.0192 - mae: 0.0192 - val_loss: 0.0195 - val_mae: 0.0195\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 84). These functions will not be directly callable after loading.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "30\n",
      "31\n",
      "32\n",
      "33\n",
      "34\n",
      "35\n",
      "36\n",
      "37\n",
      "38\n",
      "39\n",
      "(811767, 100, 1)\n",
      "6342/6342 [==============================] - 278s 44ms/step - loss: 0.0207 - mae: 0.0207 - val_loss: 0.0200 - val_mae: 0.0200\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 84). These functions will not be directly callable after loading.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "40\n",
      "41\n",
      "42\n",
      "43\n",
      "44\n",
      "45\n",
      "46\n",
      "47\n",
      "48\n",
      "49\n",
      "(884139, 100, 1)\n",
      "6908/6908 [==============================] - 302s 44ms/step - loss: 0.0198 - mae: 0.0198 - val_loss: 0.0196 - val_mae: 0.0196\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 84). These functions will not be directly callable after loading.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "50\n",
      "51\n",
      "52\n",
      "53\n",
      "54\n",
      "55\n",
      "56\n",
      "57\n",
      "58\n",
      "59\n",
      "(1186323, 100, 1)\n",
      "9218/9269 [============================>.] - ETA: 2s - loss: 0.0224 - mae: 0.0224"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Input \u001b[1;32mIn [2]\u001b[0m, in \u001b[0;36m<cell line: 13>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     32\u001b[0m y_val \u001b[38;5;241m=\u001b[39m y_val\u001b[38;5;241m.\u001b[39mreshape((y_val\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m], y_val\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m], \u001b[38;5;241m1\u001b[39m))\n\u001b[0;32m     33\u001b[0m \u001b[38;5;28mprint\u001b[39m(x\u001b[38;5;241m.\u001b[39mshape)\n\u001b[1;32m---> 34\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m128\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalidation_data\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mx_val\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_val\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mepochs\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m     35\u001b[0m model\u001b[38;5;241m.\u001b[39msave(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtransformer-steven-v5\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\keras\\utils\\traceback_utils.py:64\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m     62\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m     63\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 64\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m     65\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:  \u001b[38;5;66;03m# pylint: disable=broad-except\u001b[39;00m\n\u001b[0;32m     66\u001b[0m   filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n",
      "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\keras\\engine\\training.py:1409\u001b[0m, in \u001b[0;36mModel.fit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[0;32m   1402\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m tf\u001b[38;5;241m.\u001b[39mprofiler\u001b[38;5;241m.\u001b[39mexperimental\u001b[38;5;241m.\u001b[39mTrace(\n\u001b[0;32m   1403\u001b[0m     \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m   1404\u001b[0m     epoch_num\u001b[38;5;241m=\u001b[39mepoch,\n\u001b[0;32m   1405\u001b[0m     step_num\u001b[38;5;241m=\u001b[39mstep,\n\u001b[0;32m   1406\u001b[0m     batch_size\u001b[38;5;241m=\u001b[39mbatch_size,\n\u001b[0;32m   1407\u001b[0m     _r\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m):\n\u001b[0;32m   1408\u001b[0m   callbacks\u001b[38;5;241m.\u001b[39mon_train_batch_begin(step)\n\u001b[1;32m-> 1409\u001b[0m   tmp_logs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain_function\u001b[49m\u001b[43m(\u001b[49m\u001b[43miterator\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1410\u001b[0m   \u001b[38;5;28;01mif\u001b[39;00m data_handler\u001b[38;5;241m.\u001b[39mshould_sync:\n\u001b[0;32m   1411\u001b[0m     context\u001b[38;5;241m.\u001b[39masync_wait()\n",
      "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\util\\traceback_utils.py:150\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    148\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m    149\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 150\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    151\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m    152\u001b[0m   filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n",
      "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\eager\\def_function.py:915\u001b[0m, in \u001b[0;36mFunction.__call__\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m    912\u001b[0m compiler \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mxla\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jit_compile \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnonXla\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    914\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m OptionalXlaContext(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jit_compile):\n\u001b[1;32m--> 915\u001b[0m   result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n\u001b[0;32m    917\u001b[0m new_tracing_count \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexperimental_get_tracing_count()\n\u001b[0;32m    918\u001b[0m without_tracing \u001b[38;5;241m=\u001b[39m (tracing_count \u001b[38;5;241m==\u001b[39m new_tracing_count)\n",
      "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\eager\\def_function.py:947\u001b[0m, in \u001b[0;36mFunction._call\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m    944\u001b[0m   \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock\u001b[38;5;241m.\u001b[39mrelease()\n\u001b[0;32m    945\u001b[0m   \u001b[38;5;66;03m# In this case we have created variables on the first call, so we run the\u001b[39;00m\n\u001b[0;32m    946\u001b[0m   \u001b[38;5;66;03m# defunned version which is guaranteed to never create variables.\u001b[39;00m\n\u001b[1;32m--> 947\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_stateless_fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)  \u001b[38;5;66;03m# pylint: disable=not-callable\u001b[39;00m\n\u001b[0;32m    948\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_stateful_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m    949\u001b[0m   \u001b[38;5;66;03m# Release the lock early so that multiple threads can perform the call\u001b[39;00m\n\u001b[0;32m    950\u001b[0m   \u001b[38;5;66;03m# in parallel.\u001b[39;00m\n\u001b[0;32m    951\u001b[0m   \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock\u001b[38;5;241m.\u001b[39mrelease()\n",
      "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\eager\\function.py:2453\u001b[0m, in \u001b[0;36mFunction.__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   2450\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock:\n\u001b[0;32m   2451\u001b[0m   (graph_function,\n\u001b[0;32m   2452\u001b[0m    filtered_flat_args) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_maybe_define_function(args, kwargs)\n\u001b[1;32m-> 2453\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mgraph_function\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_flat\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   2454\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfiltered_flat_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcaptured_inputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgraph_function\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcaptured_inputs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\eager\\function.py:1860\u001b[0m, in \u001b[0;36mConcreteFunction._call_flat\u001b[1;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[0;32m   1856\u001b[0m possible_gradient_type \u001b[38;5;241m=\u001b[39m gradients_util\u001b[38;5;241m.\u001b[39mPossibleTapeGradientTypes(args)\n\u001b[0;32m   1857\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (possible_gradient_type \u001b[38;5;241m==\u001b[39m gradients_util\u001b[38;5;241m.\u001b[39mPOSSIBLE_GRADIENT_TYPES_NONE\n\u001b[0;32m   1858\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m executing_eagerly):\n\u001b[0;32m   1859\u001b[0m   \u001b[38;5;66;03m# No tape is watching; skip to running the function.\u001b[39;00m\n\u001b[1;32m-> 1860\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_build_call_outputs(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_inference_function\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   1861\u001b[0m \u001b[43m      \u001b[49m\u001b[43mctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcancellation_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcancellation_manager\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m   1862\u001b[0m forward_backward \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_select_forward_and_backward_functions(\n\u001b[0;32m   1863\u001b[0m     args,\n\u001b[0;32m   1864\u001b[0m     possible_gradient_type,\n\u001b[0;32m   1865\u001b[0m     executing_eagerly)\n\u001b[0;32m   1866\u001b[0m forward_function, args_with_tangents \u001b[38;5;241m=\u001b[39m forward_backward\u001b[38;5;241m.\u001b[39mforward()\n",
      "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\eager\\function.py:497\u001b[0m, in \u001b[0;36m_EagerDefinedFunction.call\u001b[1;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[0;32m    495\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _InterpolateFunctionError(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m    496\u001b[0m   \u001b[38;5;28;01mif\u001b[39;00m cancellation_manager \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 497\u001b[0m     outputs \u001b[38;5;241m=\u001b[39m \u001b[43mexecute\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    498\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msignature\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    499\u001b[0m \u001b[43m        \u001b[49m\u001b[43mnum_outputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_num_outputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    500\u001b[0m \u001b[43m        \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    501\u001b[0m \u001b[43m        \u001b[49m\u001b[43mattrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    502\u001b[0m \u001b[43m        \u001b[49m\u001b[43mctx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mctx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    503\u001b[0m   \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    504\u001b[0m     outputs \u001b[38;5;241m=\u001b[39m execute\u001b[38;5;241m.\u001b[39mexecute_with_cancellation(\n\u001b[0;32m    505\u001b[0m         \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msignature\u001b[38;5;241m.\u001b[39mname),\n\u001b[0;32m    506\u001b[0m         num_outputs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_num_outputs,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    509\u001b[0m         ctx\u001b[38;5;241m=\u001b[39mctx,\n\u001b[0;32m    510\u001b[0m         cancellation_manager\u001b[38;5;241m=\u001b[39mcancellation_manager)\n",
      "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\eager\\execute.py:54\u001b[0m, in \u001b[0;36mquick_execute\u001b[1;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[0;32m     52\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m     53\u001b[0m   ctx\u001b[38;5;241m.\u001b[39mensure_initialized()\n\u001b[1;32m---> 54\u001b[0m   tensors \u001b[38;5;241m=\u001b[39m \u001b[43mpywrap_tfe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mTFE_Py_Execute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mctx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_handle\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     55\u001b[0m \u001b[43m                                      \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattrs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_outputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     56\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m core\u001b[38;5;241m.\u001b[39m_NotOkStatusException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m     57\u001b[0m   \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "model = keras.models.load_model(\"transformer-steven-v5\")\n",
    "\n",
    "model.summary()\n",
    "\n",
    "\n",
    "x_val = np.load(\"data/x_200.npy\")\n",
    "y_val = np.load(\"data/y_200.npy\")\n",
    "for g in range(211,211):\n",
    "    a = np.load(\"data/x_\" + str(g) + \".npy\")\n",
    "    b = np.load(\"data/y_\" + str(g) + \".npy\")\n",
    "    x_val = np.concatenate((x_val, a))\n",
    "    y_val = np.concatenate((y_val, b))\n",
    "for k in range(50):\n",
    "    for i in range(20):# let's make the last batch the validation (#200-220 inclusive)   \n",
    "        x = None\n",
    "        y = None\n",
    "        gc.collect()\n",
    "        for j in range(10):\n",
    "            print(10* i + j)\n",
    "            new_x = np.load(\"data/x_\" + str(10* i + j) + \".npy\")\n",
    "            new_y = np.load(\"data/y_\" + str(10* i + j) + \".npy\")\n",
    "            if x is None and y is None:\n",
    "                x = new_x\n",
    "                y = new_y\n",
    "            else:\n",
    "                x = np.concatenate((x, new_x))\n",
    "                y = np.concatenate((y, new_y))\n",
    "        # this will need to be optimized\n",
    "        x = x.reshape((x.shape[0], x.shape[1], 1))\n",
    "        y = y.reshape((y.shape[0], y.shape[1], 1))\n",
    "        x_val = x_val.reshape((x_val.shape[0], x_val.shape[1], 1))\n",
    "        y_val = y_val.reshape((y_val.shape[0], y_val.shape[1], 1))\n",
    "        print(x.shape)\n",
    "        model.fit(x, y, batch_size = 128, validation_data = (x_val, y_val), epochs = 1)\n",
    "        model.save(\"transformer-steven-v5\")\n",
    "        #call evaluate/save with/without improvement\n",
    "    #model optimization and look at other papers for inspiration"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e66455f3",
   "metadata": {},
   "source": [
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "53636383",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The tensorboard extension is already loaded. To reload it, use:\n",
      "  %reload_ext tensorboard\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Reusing TensorBoard on port 6006 (pid 13352), started 0:33:47 ago. (Use '!kill 13352' to kill it.)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "      <iframe id=\"tensorboard-frame-976a3387d103c052\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
       "      </iframe>\n",
       "      <script>\n",
       "        (function() {\n",
       "          const frame = document.getElementById(\"tensorboard-frame-976a3387d103c052\");\n",
       "          const url = new URL(\"/\", window.location);\n",
       "          const port = 6006;\n",
       "          if (port) {\n",
       "            url.port = port;\n",
       "          }\n",
       "          frame.src = url;\n",
       "        })();\n",
       "      </script>\n",
       "    "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%load_ext tensorboard\n",
    "%tensorboard --logdir logs/fit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "bf14db9b",
   "metadata": {},
   "outputs": [
    {
     "ename": "SyntaxError",
     "evalue": "invalid syntax (2121200483.py, line 1)",
     "output_type": "error",
     "traceback": [
      "\u001b[1;36m  Input \u001b[1;32mIn [6]\u001b[1;36m\u001b[0m\n\u001b[1;33m    tensorboard dev upload --logdir \\\u001b[0m\n\u001b[1;37m                ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
     ]
    }
   ],
   "source": [
    "#tensorboard dev upload --logdir \\\n",
    "#   'tensorboard'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "d6800f8c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from musicautobot.music_transformer.transform import idxenc2stream\n",
    "from musicautobot.vocab import MusicVocab"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "7f4f8033",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"model\"\n",
      "__________________________________________________________________________________________________\n",
      " Layer (type)                   Output Shape         Param #     Connected to                     \n",
      "==================================================================================================\n",
      " input_1 (InputLayer)           [(None, 100, 1)]     0           []                               \n",
      "                                                                                                  \n",
      " layer_normalization (LayerNorm  (None, 100, 1)      2           ['input_1[0][0]']                \n",
      " alization)                                                                                       \n",
      "                                                                                                  \n",
      " multi_head_attention (MultiHea  (None, 100, 1)      701         ['layer_normalization[0][0]',    \n",
      " dAttention)                                                      'layer_normalization[0][0]']    \n",
      "                                                                                                  \n",
      " dropout (Dropout)              (None, 100, 1)       0           ['multi_head_attention[0][0]']   \n",
      "                                                                                                  \n",
      " tf.__operators__.add (TFOpLamb  (None, 100, 1)      0           ['dropout[0][0]',                \n",
      " da)                                                              'input_1[0][0]']                \n",
      "                                                                                                  \n",
      " layer_normalization_1 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add[0][0]']   \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " conv1d (Conv1D)                (None, 100, 4)       8           ['layer_normalization_1[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_1 (Dropout)            (None, 100, 4)       0           ['conv1d[0][0]']                 \n",
      "                                                                                                  \n",
      " conv1d_1 (Conv1D)              (None, 100, 1)       5           ['dropout_1[0][0]']              \n",
      "                                                                                                  \n",
      " tf.__operators__.add_1 (TFOpLa  (None, 100, 1)      0           ['conv1d_1[0][0]',               \n",
      " mbda)                                                            'tf.__operators__.add[0][0]']   \n",
      "                                                                                                  \n",
      " layer_normalization_2 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_1[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " multi_head_attention_1 (MultiH  (None, 100, 1)      701         ['layer_normalization_2[0][0]',  \n",
      " eadAttention)                                                    'layer_normalization_2[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_2 (Dropout)            (None, 100, 1)       0           ['multi_head_attention_1[0][0]'] \n",
      "                                                                                                  \n",
      " tf.__operators__.add_2 (TFOpLa  (None, 100, 1)      0           ['dropout_2[0][0]',              \n",
      " mbda)                                                            'tf.__operators__.add_1[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_3 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_2[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " conv1d_2 (Conv1D)              (None, 100, 4)       8           ['layer_normalization_3[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_3 (Dropout)            (None, 100, 4)       0           ['conv1d_2[0][0]']               \n",
      "                                                                                                  \n",
      " conv1d_3 (Conv1D)              (None, 100, 1)       5           ['dropout_3[0][0]']              \n",
      "                                                                                                  \n",
      " tf.__operators__.add_3 (TFOpLa  (None, 100, 1)      0           ['conv1d_3[0][0]',               \n",
      " mbda)                                                            'tf.__operators__.add_2[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_4 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_3[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " multi_head_attention_2 (MultiH  (None, 100, 1)      701         ['layer_normalization_4[0][0]',  \n",
      " eadAttention)                                                    'layer_normalization_4[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_4 (Dropout)            (None, 100, 1)       0           ['multi_head_attention_2[0][0]'] \n",
      "                                                                                                  \n",
      " tf.__operators__.add_4 (TFOpLa  (None, 100, 1)      0           ['dropout_4[0][0]',              \n",
      " mbda)                                                            'tf.__operators__.add_3[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_5 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_4[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " conv1d_4 (Conv1D)              (None, 100, 4)       8           ['layer_normalization_5[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_5 (Dropout)            (None, 100, 4)       0           ['conv1d_4[0][0]']               \n",
      "                                                                                                  \n",
      " conv1d_5 (Conv1D)              (None, 100, 1)       5           ['dropout_5[0][0]']              \n",
      "                                                                                                  \n",
      " tf.__operators__.add_5 (TFOpLa  (None, 100, 1)      0           ['conv1d_5[0][0]',               \n",
      " mbda)                                                            'tf.__operators__.add_4[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_6 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_5[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " multi_head_attention_3 (MultiH  (None, 100, 1)      701         ['layer_normalization_6[0][0]',  \n",
      " eadAttention)                                                    'layer_normalization_6[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_6 (Dropout)            (None, 100, 1)       0           ['multi_head_attention_3[0][0]'] \n",
      "                                                                                                  \n",
      " tf.__operators__.add_6 (TFOpLa  (None, 100, 1)      0           ['dropout_6[0][0]',              \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " mbda)                                                            'tf.__operators__.add_5[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_7 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_6[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " conv1d_6 (Conv1D)              (None, 100, 4)       8           ['layer_normalization_7[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_7 (Dropout)            (None, 100, 4)       0           ['conv1d_6[0][0]']               \n",
      "                                                                                                  \n",
      " conv1d_7 (Conv1D)              (None, 100, 1)       5           ['dropout_7[0][0]']              \n",
      "                                                                                                  \n",
      " tf.__operators__.add_7 (TFOpLa  (None, 100, 1)      0           ['conv1d_7[0][0]',               \n",
      " mbda)                                                            'tf.__operators__.add_6[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_8 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_7[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " multi_head_attention_4 (MultiH  (None, 100, 1)      701         ['layer_normalization_8[0][0]',  \n",
      " eadAttention)                                                    'layer_normalization_8[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_8 (Dropout)            (None, 100, 1)       0           ['multi_head_attention_4[0][0]'] \n",
      "                                                                                                  \n",
      " tf.__operators__.add_8 (TFOpLa  (None, 100, 1)      0           ['dropout_8[0][0]',              \n",
      " mbda)                                                            'tf.__operators__.add_7[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_9 (LayerNo  (None, 100, 1)      2           ['tf.__operators__.add_8[0][0]'] \n",
      " rmalization)                                                                                     \n",
      "                                                                                                  \n",
      " conv1d_8 (Conv1D)              (None, 100, 4)       8           ['layer_normalization_9[0][0]']  \n",
      "                                                                                                  \n",
      " dropout_9 (Dropout)            (None, 100, 4)       0           ['conv1d_8[0][0]']               \n",
      "                                                                                                  \n",
      " conv1d_9 (Conv1D)              (None, 100, 1)       5           ['dropout_9[0][0]']              \n",
      "                                                                                                  \n",
      " tf.__operators__.add_9 (TFOpLa  (None, 100, 1)      0           ['conv1d_9[0][0]',               \n",
      " mbda)                                                            'tf.__operators__.add_8[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_10 (LayerN  (None, 100, 1)      2           ['tf.__operators__.add_9[0][0]'] \n",
      " ormalization)                                                                                    \n",
      "                                                                                                  \n",
      " multi_head_attention_5 (MultiH  (None, 100, 1)      701         ['layer_normalization_10[0][0]', \n",
      " eadAttention)                                                    'layer_normalization_10[0][0]'] \n",
      "                                                                                                  \n",
      " dropout_10 (Dropout)           (None, 100, 1)       0           ['multi_head_attention_5[0][0]'] \n",
      "                                                                                                  \n",
      " tf.__operators__.add_10 (TFOpL  (None, 100, 1)      0           ['dropout_10[0][0]',             \n",
      " ambda)                                                           'tf.__operators__.add_9[0][0]'] \n",
      "                                                                                                  \n",
      " layer_normalization_11 (LayerN  (None, 100, 1)      2           ['tf.__operators__.add_10[0][0]']\n",
      " ormalization)                                                                                    \n",
      "                                                                                                  \n",
      " conv1d_10 (Conv1D)             (None, 100, 4)       8           ['layer_normalization_11[0][0]'] \n",
      "                                                                                                  \n",
      " dropout_11 (Dropout)           (None, 100, 4)       0           ['conv1d_10[0][0]']              \n",
      "                                                                                                  \n",
      " conv1d_11 (Conv1D)             (None, 100, 1)       5           ['dropout_11[0][0]']             \n",
      "                                                                                                  \n",
      " tf.__operators__.add_11 (TFOpL  (None, 100, 1)      0           ['conv1d_11[0][0]',              \n",
      " ambda)                                                           'tf.__operators__.add_10[0][0]']\n",
      "                                                                                                  \n",
      " global_average_pooling1d (Glob  (None, 100)         0           ['tf.__operators__.add_11[0][0]']\n",
      " alAveragePooling1D)                                                                              \n",
      "                                                                                                  \n",
      " dense (Dense)                  (None, 80)           8080        ['global_average_pooling1d[0][0]'\n",
      "                                                                 ]                                \n",
      "                                                                                                  \n",
      " dropout_12 (Dropout)           (None, 80)           0           ['dense[0][0]']                  \n",
      "                                                                                                  \n",
      " dense_1 (Dense)                (None, 50)           4050        ['dropout_12[0][0]']             \n",
      "                                                                                                  \n",
      " dropout_13 (Dropout)           (None, 50)           0           ['dense_1[0][0]']                \n",
      "                                                                                                  \n",
      " dense_2 (Dense)                (None, 30)           1530        ['dropout_13[0][0]']             \n",
      "                                                                                                  \n",
      " dropout_14 (Dropout)           (None, 30)           0           ['dense_2[0][0]']                \n",
      "                                                                                                  \n",
      " dense_3 (Dense)                (None, 2)            62          ['dropout_14[0][0]']             \n",
      "                                                                                                  \n",
      "==================================================================================================\n",
      "Total params: 18,030\n",
      "Trainable params: 18,030\n",
      "Non-trainable params: 0\n",
      "__________________________________________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "#predict pipeline below\n",
    "\n",
    "model = keras.models.load_model(\"transformer-steven-final-v2\")\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "beaacb59",
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict_music(model, input_vector, num):\n",
    "    normalized = input_vector / 311\n",
    "    for i in range(num):\n",
    "        predict = model.predict(np.reshape(normalized[-100:], (1,100)), verbose = 0)\n",
    "        normalized = np.append(normalized, predict)\n",
    "    \n",
    "    result = np.rint(normalized * 311)\n",
    "    # edits to prediction\n",
    "    for i in range(100, len(result)):\n",
    "        if i % 2 == 0:\n",
    "            if abs(result[i] - 8) < 5:\n",
    "                print(\"change made to \" + str(result[i]))\n",
    "                result[i] = 8\n",
    "        else:\n",
    "            if result[i] < 137:\n",
    "                print(\"bounded \" + str(result[i]))\n",
    "                result[i] = 137\n",
    "    return result\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "5772c234",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "change made to 9.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "change made to 8.0\n",
      "[  8. 138.  47. 137. ...   8. 137.  45. 137.]\n",
      "0 1 8 161 82 143 73 149 66 147 8 138 82 148 78 148 8 140 73 139 8 139 78 139 8 139 73 140 8 139 78 139 8 139 82 142 82 141 78 141 78 141 73 146 66 147 8 141 73 140 8 139 78 140 8 139 73 139 8 139 78 139 8 138 73 138 8 138 85 149 83 141 78 141 78 149 73 158 71 147 8 141 75 139 8 139 78 139 8 139 75 139 8 139 78 139 8 139 83 142 8 141 79 141 21 141 50 140 8 140 75 140 8 140 77 139 8 140 75 140 8 140 75 139 8 140 74 140 8 141 75 140 8 141 74 140 8 141 73 140 8 141 73 140 8 140 72 140 8 140 72 140 8 140 72 140 8 140 72 140 8 140 71 140 8 141 72 140 8 141 71 140 8 141 71 140 8 140 70 140 8 140 69 139 8 140 69 139 8 140 69 140 8 140 68 140 8 140 68 139 8 140 67 139 8 140 67 139 8 140 67 139 8 140 66 139 8 140 66 139 8 140 65 139 8 140 65 139 8 140 65 139 8 140 64 139 8 139 64 139 8 139 64 139 8 139 63 139 8 139 63 139 8 139 63 139 8 139 63 139 8 139 62 139 8 139 62 139 8 139 62 139 8 139 62 139 8 139 61 139 8 139 61 139 8 139 61 139 8 139 61 139 8 139 60 139 8 139 60 139 8 139 60 139 8 139 60 139 8 139 59 139 8 138 59 139 8 138 59 138 8 138 59 138 8 138 59 138 8 138 58 138 8 138 58 138 8 138 58 138 8 138 58 138 8 138 58 138 8 138 57 138 8 138 57 138 8 138 57 138 8 138 57 138 8 138 57 138 8 138 56 138 8 138 56 138 8 138 56 138 8 138 56 138 8 138 56 138 8 138 56 138 8 138 55 138 8 138 55 138 8 138 55 138 8 138 55 138 8 138 55 138 8 138 54 138 8 138 54 138 8 138 54 138 8 138 54 138 8 138 54 138 8 138 54 138 8 138 53 138 8 138 53 138 8 138 53 138 8 138 53 138 8 138 53 138 8 138 52 138 8 138 52 138 8 138 52 138 8 138 52 138 8 138 51 138 8 138 51 138 8 138 51 138 8 138 51 138 8 138 51 138 8 138 51 138 8 138 50 138 8 138 50 138 8 138 50 138 8 138 50 138 8 138 50 138 8 138 50 138 8 138 50 138 8 138 49 138 8 138 49 138 8 138 49 138 8 138 49 137 8 138 49 137 8 138 49 137 8 138 48 137 8 138 48 137 8 138 48 137 8 138 48 137 8 138 48 137 8 138 48 137 8 138 48 137 8 138 48 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 45 137 8 137 45 137 8 137 45 137 8 137 45 137 8 137 45 137 8 137 45 137 "
     ]
    }
   ],
   "source": [
    "data = np.load(\"data.npy\", allow_pickle = True)\n",
    "test = data[500][0:100]\n",
    "def midi_predict(model, test):\n",
    "    test_midi = idxenc2stream(test.astype(\"int\"), MusicVocab.create())\n",
    "    test_midi.write('midi',\"input.mid\")\n",
    "    res = predict_music(model, test, 300)\n",
    "    print(res[-100:])\n",
    "    output = idxenc2stream(res.astype(\"int\"), MusicVocab.create())\n",
    "    output.write('midi',\"output.mid\")\n",
    "    return res.astype(\"int\")\n",
    "array = midi_predict(model, test)\n",
    "for elem in array:\n",
    "    print(elem, end = \" \")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "3268f624",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 84). These functions will not be directly callable after loading.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: transformer-steven-final-v2\\assets\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: transformer-steven-final-v2\\assets\n"
     ]
    }
   ],
   "source": [
    "model.save(\"transformer-steven-final-v2\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "235839fe",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[  0   1   8 153  74 140  69 140  66 140   8 138  62 138  57 142   8 139\n",
      "  62 139   8 138  74 139  73 138  69 139  66 139   8 139  62 138   8 138\n",
      "  74 141  69 142  66 139   8 138  62 138  57 144   8 138  66 138   8 138\n",
      "  62 141   8 138  66 138   8 138  74 140  69 138  66 139   8 138  69 138\n",
      "   8 139  69 138  66 138   8 138  73 140  69 142  64 140  57 144   8 139\n",
      "  61 140   8 139  76 140  64 140   8 138]\n",
      "[  0   1   8 153  74 140  69 140  66 140   8 138  62 138  57 142   8 139\n",
      "  62 139   8 138  74 139  73 138  69 139  66 139   8 139  62 138   8 138\n",
      "  74 141  69 142  66 139   8 138  62 138  57 144   8 138  66 138   8 138\n",
      "  62 141   8 138  66 138   8 138  74 140  69 138  66 139   8 138  69 138\n",
      "   8 139  69 138  66 138   8 138  73 140  69 142  64 140  57 144   8 139\n",
      "  61 140   8 139  76 140  64 140   8 138]\n"
     ]
    }
   ],
   "source": [
    "# ssh -i \"C:\\Users\\drago\\Downloads\\ssh-key-2022-10-25.key\" ubuntu@150.136.137.227\n",
    "# scp -i \"C:\\Users\\drago\\Downloads\\ssh-key-2022-10-25.key\" ubuntu@150.136.137.227:/home/ubuntu/projectpathing/updatedCheckpoints C:\\Users\\drago\\Downloads\n",
    "print(test)\n",
    "print(test[-100:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "2b570e5a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(772051, 2)\n"
     ]
    }
   ],
   "source": [
    "print(y.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "0216d1c5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"sequential_17\"\n",
      "_________________________________________________________________\n",
      " Layer (type)                Output Shape              Param #   \n",
      "=================================================================\n",
      " dense_105 (Dense)           (None, 100)               10100     \n",
      "                                                                 \n",
      " dense_106 (Dense)           (None, 100)               10100     \n",
      "                                                                 \n",
      " dense_107 (Dense)           (None, 85)                8585      \n",
      "                                                                 \n",
      " dense_108 (Dense)           (None, 75)                6450      \n",
      "                                                                 \n",
      " dense_109 (Dense)           (None, 65)                4940      \n",
      "                                                                 \n",
      " dense_110 (Dense)           (None, 35)                2310      \n",
      "                                                                 \n",
      " dense_111 (Dense)           (None, 2)                 72        \n",
      "                                                                 \n",
      "=================================================================\n",
      "Total params: 42,557\n",
      "Trainable params: 42,557\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "041a1f27",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_model_memory_usage(batch_size, model):\n",
    "    import numpy as np\n",
    "    try:\n",
    "        from keras import backend as K\n",
    "    except:\n",
    "        from tensorflow.keras import backend as K\n",
    "\n",
    "    shapes_mem_count = 0\n",
    "    internal_model_mem_count = 0\n",
    "    for l in model.layers:\n",
    "        layer_type = l.__class__.__name__\n",
    "        if layer_type == 'Model':\n",
    "            internal_model_mem_count += get_model_memory_usage(batch_size, l)\n",
    "        single_layer_mem = 1\n",
    "        out_shape = l.output_shape\n",
    "        if type(out_shape) is list:\n",
    "            out_shape = out_shape[0]\n",
    "        for s in out_shape:\n",
    "            if s is None:\n",
    "                continue\n",
    "            single_layer_mem *= s\n",
    "        shapes_mem_count += single_layer_mem\n",
    "\n",
    "    trainable_count = np.sum([K.count_params(p) for p in model.trainable_weights])\n",
    "    non_trainable_count = np.sum([K.count_params(p) for p in model.non_trainable_weights])\n",
    "\n",
    "    number_size = 4.0\n",
    "    if K.floatx() == 'float16':\n",
    "        number_size = 2.0\n",
    "    if K.floatx() == 'float64':\n",
    "        number_size = 8.0\n",
    "\n",
    "    total_memory = number_size * (batch_size * shapes_mem_count + trainable_count + non_trainable_count)\n",
    "    gbytes = np.round(total_memory / (1024.0 ** 3), 3) + internal_model_mem_count\n",
    "    return gbytes"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}