{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "c8ba5338", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import keras\n", "import tensorflow as tf\n", "from tensorflow import keras\n", "from tensorflow.keras import layers\n", "import datetime\n", "from packaging import version\n", "\n", "\n", "from enum import Enum\n", "\n", "import os\n", "import gc" ] }, { "cell_type": "code", "execution_count": 4, "id": "1ef43cba", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TensorFlow version: 2.9.1\n", "Found GPU at: /device:GPU:0\n" ] } ], "source": [ "print(\"TensorFlow version: \", tf.__version__)\n", "device_name = tf.test.gpu_device_name()\n", "if not device_name:\n", " raise SystemError('GPU device not found')\n", "print('Found GPU at: {}'.format(device_name))\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "27a42a63", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "x_0.npy\n", "x_1.npy\n", "x_10.npy\n", "x_100.npy\n", "x_101.npy\n", "x_102.npy\n", "x_103.npy\n", "x_104.npy\n", "x_105.npy\n", "x_106.npy\n", "x_107.npy\n", "x_108.npy\n", "x_109.npy\n", "x_11.npy\n", "x_110.npy\n", "x_111.npy\n", "x_112.npy\n", "x_113.npy\n", "x_114.npy\n", "x_115.npy\n", "x_116.npy\n", "x_117.npy\n", "x_118.npy\n", "x_119.npy\n", "x_12.npy\n", "x_120.npy\n", "x_121.npy\n", "x_122.npy\n", "x_123.npy\n", "x_124.npy\n", "x_125.npy\n", "x_126.npy\n", "x_127.npy\n", "x_128.npy\n", "x_129.npy\n", "x_13.npy\n", "x_130.npy\n", "x_131.npy\n", "x_132.npy\n", "x_133.npy\n", "x_134.npy\n", "x_135.npy\n", "x_136.npy\n", "x_137.npy\n", "x_138.npy\n", "x_139.npy\n", "x_14.npy\n", "x_140.npy\n", "x_141.npy\n", "x_142.npy\n", "x_143.npy\n", "x_144.npy\n", "x_145.npy\n", "x_146.npy\n", "x_147.npy\n", "x_148.npy\n", "x_149.npy\n", "x_15.npy\n", "x_150.npy\n", "x_151.npy\n", "x_152.npy\n", "x_153.npy\n", "x_154.npy\n", "x_155.npy\n", "x_156.npy\n", "x_157.npy\n", "x_158.npy\n", "x_159.npy\n", "x_16.npy\n", "x_160.npy\n", "x_161.npy\n", "x_162.npy\n", "x_163.npy\n", "x_164.npy\n", "x_165.npy\n", "x_166.npy\n", "x_167.npy\n", "x_168.npy\n", "x_169.npy\n", "x_17.npy\n", "x_170.npy\n", "x_171.npy\n", "x_172.npy\n", "x_173.npy\n", "x_174.npy\n", "x_175.npy\n", "x_176.npy\n", "x_177.npy\n", "x_178.npy\n", "x_179.npy\n", "x_18.npy\n", "x_180.npy\n", "x_181.npy\n", "x_182.npy\n", "x_183.npy\n", "x_184.npy\n", "x_185.npy\n", "x_186.npy\n", "x_187.npy\n", "x_188.npy\n", "x_189.npy\n", "x_19.npy\n", "x_190.npy\n", "x_191.npy\n", "x_192.npy\n", "x_193.npy\n", "x_194.npy\n", "x_195.npy\n", "x_196.npy\n", "x_197.npy\n", "x_198.npy\n", "x_199.npy\n", "x_2.npy\n", "x_20.npy\n", "x_200.npy\n", "x_201.npy\n", "x_202.npy\n", "x_203.npy\n", "x_204.npy\n", "x_205.npy\n", "x_206.npy\n", "x_207.npy\n", "x_208.npy\n", "x_209.npy\n", "x_21.npy\n", "x_210.npy\n", "x_211.npy\n", "x_212.npy\n", "x_213.npy\n", "x_214.npy\n", "x_215.npy\n", "x_216.npy\n", "x_217.npy\n", "x_218.npy\n", "x_219.npy\n", "x_22.npy\n", "x_220.npy\n", "x_23.npy\n", "x_24.npy\n", "x_25.npy\n", "x_26.npy\n", "x_27.npy\n", "x_28.npy\n", "x_29.npy\n", "x_3.npy\n", "x_30.npy\n", "x_31.npy\n", "x_32.npy\n", "x_33.npy\n", "x_34.npy\n", "x_35.npy\n", "x_36.npy\n", "x_37.npy\n", "x_38.npy\n", "x_39.npy\n", "x_4.npy\n", "x_40.npy\n", "x_41.npy\n", "x_42.npy\n", "x_43.npy\n", "x_44.npy\n", "x_45.npy\n", "x_46.npy\n", "x_47.npy\n", "x_48.npy\n", "x_49.npy\n", "x_5.npy\n", "x_50.npy\n", "x_51.npy\n", "x_52.npy\n", "x_53.npy\n", "x_54.npy\n", "x_55.npy\n", "x_56.npy\n", "x_57.npy\n", "x_58.npy\n", "x_59.npy\n", "x_6.npy\n", "x_60.npy\n", "x_61.npy\n", "x_62.npy\n", "x_63.npy\n", "x_64.npy\n", "x_65.npy\n", "x_66.npy\n", "x_67.npy\n", "x_68.npy\n", "x_69.npy\n", "x_7.npy\n", "x_70.npy\n", "x_71.npy\n", "x_72.npy\n", "x_73.npy\n", "x_74.npy\n", "x_75.npy\n", "x_76.npy\n", "x_77.npy\n", "x_78.npy\n", "x_79.npy\n", "x_8.npy\n", "x_80.npy\n", "x_81.npy\n", "x_82.npy\n", "x_83.npy\n", "x_84.npy\n", "x_85.npy\n", "x_86.npy\n", "x_87.npy\n", "x_88.npy\n", "x_89.npy\n", "x_9.npy\n", "x_90.npy\n", "x_91.npy\n", "x_92.npy\n", "x_93.npy\n", "x_94.npy\n", "x_95.npy\n", "x_96.npy\n", "x_97.npy\n", "x_98.npy\n", "x_99.npy\n", "y_0.npy\n", "y_1.npy\n", "y_10.npy\n", "y_100.npy\n", "y_101.npy\n", "y_102.npy\n", "y_103.npy\n", "y_104.npy\n", "y_105.npy\n", "y_106.npy\n", "y_107.npy\n", "y_108.npy\n", "y_109.npy\n", "y_11.npy\n", "y_110.npy\n", "y_111.npy\n", "y_112.npy\n", "y_113.npy\n", "y_114.npy\n", "y_115.npy\n", "y_116.npy\n", "y_117.npy\n", "y_118.npy\n", "y_119.npy\n", "y_12.npy\n", "y_120.npy\n", "y_121.npy\n", "y_122.npy\n", "y_123.npy\n", "y_124.npy\n", "y_125.npy\n", "y_126.npy\n", "y_127.npy\n", "y_128.npy\n", "y_129.npy\n", "y_13.npy\n", "y_130.npy\n", "y_131.npy\n", "y_132.npy\n", "y_133.npy\n", "y_134.npy\n", "y_135.npy\n", "y_136.npy\n", "y_137.npy\n", "y_138.npy\n", "y_139.npy\n", "y_14.npy\n", "y_140.npy\n", "y_141.npy\n", "y_142.npy\n", "y_143.npy\n", "y_144.npy\n", "y_145.npy\n", "y_146.npy\n", "y_147.npy\n", "y_148.npy\n", "y_149.npy\n", "y_15.npy\n", "y_150.npy\n", "y_151.npy\n", "y_152.npy\n", "y_153.npy\n", "y_154.npy\n", "y_155.npy\n", "y_156.npy\n", "y_157.npy\n", "y_158.npy\n", "y_159.npy\n", "y_16.npy\n", "y_160.npy\n", "y_161.npy\n", "y_162.npy\n", "y_163.npy\n", "y_164.npy\n", "y_165.npy\n", "y_166.npy\n", "y_167.npy\n", "y_168.npy\n", "y_169.npy\n", "y_17.npy\n", "y_170.npy\n", "y_171.npy\n", "y_172.npy\n", "y_173.npy\n", "y_174.npy\n", "y_175.npy\n", "y_176.npy\n", "y_177.npy\n", "y_178.npy\n", "y_179.npy\n", "y_18.npy\n", "y_180.npy\n", "y_181.npy\n", "y_182.npy\n", "y_183.npy\n", "y_184.npy\n", "y_185.npy\n", "y_186.npy\n", "y_187.npy\n", "y_188.npy\n", "y_189.npy\n", "y_19.npy\n", "y_190.npy\n", "y_191.npy\n", "y_192.npy\n", "y_193.npy\n", "y_194.npy\n", "y_195.npy\n", "y_196.npy\n", "y_197.npy\n", "y_198.npy\n", "y_199.npy\n", "y_2.npy\n", "y_20.npy\n", "y_200.npy\n", "y_201.npy\n", "y_202.npy\n", "y_203.npy\n", "y_204.npy\n", "y_205.npy\n", "y_206.npy\n", "y_207.npy\n", "y_208.npy\n", "y_209.npy\n", "y_21.npy\n", "y_210.npy\n", "y_211.npy\n", "y_212.npy\n", "y_213.npy\n", "y_214.npy\n", "y_215.npy\n", "y_216.npy\n", "y_217.npy\n", "y_218.npy\n", "y_219.npy\n", "y_22.npy\n", "y_220.npy\n", "y_23.npy\n", "y_24.npy\n", "y_25.npy\n", "y_26.npy\n", "y_27.npy\n", "y_28.npy\n", "y_29.npy\n", "y_3.npy\n", "y_30.npy\n", "y_31.npy\n", "y_32.npy\n", "y_33.npy\n", "y_34.npy\n", "y_35.npy\n", "y_36.npy\n", "y_37.npy\n", "y_38.npy\n", "y_39.npy\n", "y_4.npy\n", "y_40.npy\n", "y_41.npy\n", "y_42.npy\n", "y_43.npy\n", "y_44.npy\n", "y_45.npy\n", "y_46.npy\n", "y_47.npy\n", "y_48.npy\n", "y_49.npy\n", "y_5.npy\n", "y_50.npy\n", "y_51.npy\n", "y_52.npy\n", "y_53.npy\n", "y_54.npy\n", "y_55.npy\n", "y_56.npy\n", "y_57.npy\n", "y_58.npy\n", "y_59.npy\n", "y_6.npy\n", "y_60.npy\n", "y_61.npy\n", "y_62.npy\n", "y_63.npy\n", "y_64.npy\n", "y_65.npy\n", "y_66.npy\n", "y_67.npy\n", "y_68.npy\n", "y_69.npy\n", "y_7.npy\n", "y_70.npy\n", "y_71.npy\n", "y_72.npy\n", "y_73.npy\n", "y_74.npy\n", "y_75.npy\n", "y_76.npy\n", "y_77.npy\n", "y_78.npy\n", "y_79.npy\n", "y_8.npy\n", "y_80.npy\n", "y_81.npy\n", "y_82.npy\n", "y_83.npy\n", "y_84.npy\n", "y_85.npy\n", "y_86.npy\n", "y_87.npy\n", "y_88.npy\n", "y_89.npy\n", "y_9.npy\n", "y_90.npy\n", "y_91.npy\n", "y_92.npy\n", "y_93.npy\n", "y_94.npy\n", "y_95.npy\n", "y_96.npy\n", "y_97.npy\n", "y_98.npy\n", "y_99.npy\n" ] } ], "source": [ "data_path = \"data\"\n", "for file in os.listdir(data_path):\n", " print(file)\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "18b502b1", "metadata": {}, "outputs": [], "source": [ "n_classes = 2\n", "def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):\n", " # Normalization and Attention\n", " x = layers.LayerNormalization(epsilon=1e-6)(inputs)\n", " x = layers.MultiHeadAttention(\n", " key_dim=head_size, num_heads=num_heads, dropout=dropout\n", " )(x, x)\n", " x = layers.Dropout(dropout)(x)\n", " res = x + inputs\n", "\n", " # Feed Forward Part\n", " x = layers.LayerNormalization(epsilon=1e-6)(res)\n", " x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation=\"relu\")(x)\n", " x = layers.Dropout(dropout)(x)\n", " x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)\n", " return x + res\n", "\n", "def build_model(\n", " input_shape,\n", " head_size,\n", " num_heads,\n", " ff_dim,\n", " num_transformer_blocks,\n", " mlp_units,\n", " dropout=0,\n", " mlp_dropout=0,\n", "):\n", " inputs = keras.Input(shape=input_shape)\n", " x = inputs\n", " for _ in range(num_transformer_blocks):\n", " x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)\n", "\n", " x = layers.GlobalAveragePooling1D(data_format=\"channels_first\")(x)\n", " for dim in mlp_units:\n", " x = layers.Dense(dim, activation=\"relu\")(x)\n", " x = layers.Dropout(mlp_dropout)(x)\n", " #changed from softmax to relu\n", " outputs = layers.Dense(n_classes, activation=\"relu\")(x)\n", " return keras.Model(inputs, outputs)" ] }, { "cell_type": "code", "execution_count": 3, "id": "a9ce858a", "metadata": {}, "outputs": [], "source": [ "# optional tensorboard training metrics\n", "log_dir = \"logs/fit/\"\n", "tensorboard_callback = tboard_callback = tf.keras.callbacks.TensorBoard(log_dir = log_dir,\n", " histogram_freq = 1,\n", " profile_batch = '500,520')" ] }, { "cell_type": "code", "execution_count": 8, "id": "0b4c42e4", "metadata": {}, "outputs": [], "source": [ "model = build_model((100,1), \n", " head_size=25,\n", " num_heads=4,\n", " ff_dim=4,\n", " num_transformer_blocks=6,\n", " mlp_units=[80,50,30], #\n", " mlp_dropout=0,\n", " dropout=0)\n", "model.compile(\n", " loss=\"mae\",\n", " optimizer=keras.optimizers.Adam(learning_rate=1e-4),\n", " metrics=[\"mae\"],\n", ")" ] }, { "cell_type": "code", "execution_count": 2, "id": "22a03f0d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"model\"\n", "__________________________________________________________________________________________________\n", " Layer (type) Output Shape Param # Connected to \n", "==================================================================================================\n", " input_1 (InputLayer) [(None, 100, 1)] 0 [] \n", " \n", " layer_normalization (LayerNorm (None, 100, 1) 2 ['input_1[0][0]'] \n", " alization) \n", " \n", " multi_head_attention (MultiHea (None, 100, 1) 701 ['layer_normalization[0][0]', \n", " dAttention) 'layer_normalization[0][0]'] \n", " \n", " dropout (Dropout) (None, 100, 1) 0 ['multi_head_attention[0][0]'] \n", " \n", " tf.__operators__.add (TFOpLamb (None, 100, 1) 0 ['dropout[0][0]', \n", " da) 'input_1[0][0]'] \n", " \n", " layer_normalization_1 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add[0][0]'] \n", " rmalization) \n", " \n", " conv1d (Conv1D) (None, 100, 4) 8 ['layer_normalization_1[0][0]'] \n", " \n", " dropout_1 (Dropout) (None, 100, 4) 0 ['conv1d[0][0]'] \n", " \n", " conv1d_1 (Conv1D) (None, 100, 1) 5 ['dropout_1[0][0]'] \n", " \n", " tf.__operators__.add_1 (TFOpLa (None, 100, 1) 0 ['conv1d_1[0][0]', \n", " mbda) 'tf.__operators__.add[0][0]'] \n", " \n", " layer_normalization_2 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_1[0][0]'] \n", " rmalization) \n", " \n", " multi_head_attention_1 (MultiH (None, 100, 1) 701 ['layer_normalization_2[0][0]', \n", " eadAttention) 'layer_normalization_2[0][0]'] \n", " \n", " dropout_2 (Dropout) (None, 100, 1) 0 ['multi_head_attention_1[0][0]'] \n", " \n", " tf.__operators__.add_2 (TFOpLa (None, 100, 1) 0 ['dropout_2[0][0]', \n", " mbda) 'tf.__operators__.add_1[0][0]'] \n", " \n", " layer_normalization_3 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_2[0][0]'] \n", " rmalization) \n", " \n", " conv1d_2 (Conv1D) (None, 100, 4) 8 ['layer_normalization_3[0][0]'] \n", " \n", " dropout_3 (Dropout) (None, 100, 4) 0 ['conv1d_2[0][0]'] \n", " \n", " conv1d_3 (Conv1D) (None, 100, 1) 5 ['dropout_3[0][0]'] \n", " \n", " tf.__operators__.add_3 (TFOpLa (None, 100, 1) 0 ['conv1d_3[0][0]', \n", " mbda) 'tf.__operators__.add_2[0][0]'] \n", " \n", " layer_normalization_4 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_3[0][0]'] \n", " rmalization) \n", " \n", " multi_head_attention_2 (MultiH (None, 100, 1) 701 ['layer_normalization_4[0][0]', \n", " eadAttention) 'layer_normalization_4[0][0]'] \n", " \n", " dropout_4 (Dropout) (None, 100, 1) 0 ['multi_head_attention_2[0][0]'] \n", " \n", " tf.__operators__.add_4 (TFOpLa (None, 100, 1) 0 ['dropout_4[0][0]', \n", " mbda) 'tf.__operators__.add_3[0][0]'] \n", " \n", " layer_normalization_5 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_4[0][0]'] \n", " rmalization) \n", " \n", " conv1d_4 (Conv1D) (None, 100, 4) 8 ['layer_normalization_5[0][0]'] \n", " \n", " dropout_5 (Dropout) (None, 100, 4) 0 ['conv1d_4[0][0]'] \n", " \n", " conv1d_5 (Conv1D) (None, 100, 1) 5 ['dropout_5[0][0]'] \n", " \n", " tf.__operators__.add_5 (TFOpLa (None, 100, 1) 0 ['conv1d_5[0][0]', \n", " mbda) 'tf.__operators__.add_4[0][0]'] \n", " \n", " layer_normalization_6 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_5[0][0]'] \n", " rmalization) \n", " \n", " multi_head_attention_3 (MultiH (None, 100, 1) 701 ['layer_normalization_6[0][0]', \n", " eadAttention) 'layer_normalization_6[0][0]'] \n", " \n", " dropout_6 (Dropout) (None, 100, 1) 0 ['multi_head_attention_3[0][0]'] \n", " \n", " tf.__operators__.add_6 (TFOpLa (None, 100, 1) 0 ['dropout_6[0][0]', \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " mbda) 'tf.__operators__.add_5[0][0]'] \n", " \n", " layer_normalization_7 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_6[0][0]'] \n", " rmalization) \n", " \n", " conv1d_6 (Conv1D) (None, 100, 4) 8 ['layer_normalization_7[0][0]'] \n", " \n", " dropout_7 (Dropout) (None, 100, 4) 0 ['conv1d_6[0][0]'] \n", " \n", " conv1d_7 (Conv1D) (None, 100, 1) 5 ['dropout_7[0][0]'] \n", " \n", " tf.__operators__.add_7 (TFOpLa (None, 100, 1) 0 ['conv1d_7[0][0]', \n", " mbda) 'tf.__operators__.add_6[0][0]'] \n", " \n", " layer_normalization_8 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_7[0][0]'] \n", " rmalization) \n", " \n", " multi_head_attention_4 (MultiH (None, 100, 1) 701 ['layer_normalization_8[0][0]', \n", " eadAttention) 'layer_normalization_8[0][0]'] \n", " \n", " dropout_8 (Dropout) (None, 100, 1) 0 ['multi_head_attention_4[0][0]'] \n", " \n", " tf.__operators__.add_8 (TFOpLa (None, 100, 1) 0 ['dropout_8[0][0]', \n", " mbda) 'tf.__operators__.add_7[0][0]'] \n", " \n", " layer_normalization_9 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_8[0][0]'] \n", " rmalization) \n", " \n", " conv1d_8 (Conv1D) (None, 100, 4) 8 ['layer_normalization_9[0][0]'] \n", " \n", " dropout_9 (Dropout) (None, 100, 4) 0 ['conv1d_8[0][0]'] \n", " \n", " conv1d_9 (Conv1D) (None, 100, 1) 5 ['dropout_9[0][0]'] \n", " \n", " tf.__operators__.add_9 (TFOpLa (None, 100, 1) 0 ['conv1d_9[0][0]', \n", " mbda) 'tf.__operators__.add_8[0][0]'] \n", " \n", " layer_normalization_10 (LayerN (None, 100, 1) 2 ['tf.__operators__.add_9[0][0]'] \n", " ormalization) \n", " \n", " multi_head_attention_5 (MultiH (None, 100, 1) 701 ['layer_normalization_10[0][0]', \n", " eadAttention) 'layer_normalization_10[0][0]'] \n", " \n", " dropout_10 (Dropout) (None, 100, 1) 0 ['multi_head_attention_5[0][0]'] \n", " \n", " tf.__operators__.add_10 (TFOpL (None, 100, 1) 0 ['dropout_10[0][0]', \n", " ambda) 'tf.__operators__.add_9[0][0]'] \n", " \n", " layer_normalization_11 (LayerN (None, 100, 1) 2 ['tf.__operators__.add_10[0][0]']\n", " ormalization) \n", " \n", " conv1d_10 (Conv1D) (None, 100, 4) 8 ['layer_normalization_11[0][0]'] \n", " \n", " dropout_11 (Dropout) (None, 100, 4) 0 ['conv1d_10[0][0]'] \n", " \n", " conv1d_11 (Conv1D) (None, 100, 1) 5 ['dropout_11[0][0]'] \n", " \n", " tf.__operators__.add_11 (TFOpL (None, 100, 1) 0 ['conv1d_11[0][0]', \n", " ambda) 'tf.__operators__.add_10[0][0]']\n", " \n", " global_average_pooling1d (Glob (None, 100) 0 ['tf.__operators__.add_11[0][0]']\n", " alAveragePooling1D) \n", " \n", " dense (Dense) (None, 80) 8080 ['global_average_pooling1d[0][0]'\n", " ] \n", " \n", " dropout_12 (Dropout) (None, 80) 0 ['dense[0][0]'] \n", " \n", " dense_1 (Dense) (None, 50) 4050 ['dropout_12[0][0]'] \n", " \n", " dropout_13 (Dropout) (None, 50) 0 ['dense_1[0][0]'] \n", " \n", " dense_2 (Dense) (None, 30) 1530 ['dropout_13[0][0]'] \n", " \n", " dropout_14 (Dropout) (None, 30) 0 ['dense_2[0][0]'] \n", " \n", " dense_3 (Dense) (None, 2) 62 ['dropout_14[0][0]'] \n", " \n", "==================================================================================================\n", "Total params: 18,030\n", "Trainable params: 18,030\n", "Non-trainable params: 0\n", "__________________________________________________________________________________________________\n", "0\n", "1\n", "2\n", "3\n", "4\n", "5\n", "6\n", "7\n", "8\n", "9\n", "(772051, 100, 1)\n", "6032/6032 [==============================] - 283s 45ms/step - loss: 0.0187 - mae: 0.0187 - val_loss: 0.0197 - val_mae: 0.0197\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 84). These functions will not be directly callable after loading.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "10\n", "11\n", "12\n", "13\n", "14\n", "15\n", "16\n", "17\n", "18\n", "19\n", "(851081, 100, 1)\n", "6650/6650 [==============================] - 295s 44ms/step - loss: 0.0178 - mae: 0.0178 - val_loss: 0.0196 - val_mae: 0.0196\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 84). These functions will not be directly callable after loading.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "20\n", "21\n", "22\n", "23\n", "24\n", "25\n", "26\n", "27\n", "28\n", "29\n", "(674329, 100, 1)\n", "5269/5269 [==============================] - 232s 44ms/step - loss: 0.0192 - mae: 0.0192 - val_loss: 0.0195 - val_mae: 0.0195\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 84). These functions will not be directly callable after loading.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "30\n", "31\n", "32\n", "33\n", "34\n", "35\n", "36\n", "37\n", "38\n", "39\n", "(811767, 100, 1)\n", "6342/6342 [==============================] - 278s 44ms/step - loss: 0.0207 - mae: 0.0207 - val_loss: 0.0200 - val_mae: 0.0200\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 84). These functions will not be directly callable after loading.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "40\n", "41\n", "42\n", "43\n", "44\n", "45\n", "46\n", "47\n", "48\n", "49\n", "(884139, 100, 1)\n", "6908/6908 [==============================] - 302s 44ms/step - loss: 0.0198 - mae: 0.0198 - val_loss: 0.0196 - val_mae: 0.0196\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 84). These functions will not be directly callable after loading.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: transformer-steven-v5\\assets\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "50\n", "51\n", "52\n", "53\n", "54\n", "55\n", "56\n", "57\n", "58\n", "59\n", "(1186323, 100, 1)\n", "9218/9269 [============================>.] - ETA: 2s - loss: 0.0224 - mae: 0.0224" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "Input \u001b[1;32mIn [2]\u001b[0m, in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 32\u001b[0m y_val \u001b[38;5;241m=\u001b[39m y_val\u001b[38;5;241m.\u001b[39mreshape((y_val\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m], y_val\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m], \u001b[38;5;241m1\u001b[39m))\n\u001b[0;32m 33\u001b[0m \u001b[38;5;28mprint\u001b[39m(x\u001b[38;5;241m.\u001b[39mshape)\n\u001b[1;32m---> 34\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m128\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalidation_data\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mx_val\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_val\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mepochs\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 35\u001b[0m model\u001b[38;5;241m.\u001b[39msave(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtransformer-steven-v5\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\keras\\utils\\traceback_utils.py:64\u001b[0m, in \u001b[0;36mfilter_traceback..error_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 62\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 63\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 64\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 65\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e: \u001b[38;5;66;03m# pylint: disable=broad-except\u001b[39;00m\n\u001b[0;32m 66\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n", "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\keras\\engine\\training.py:1409\u001b[0m, in \u001b[0;36mModel.fit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[0;32m 1402\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m tf\u001b[38;5;241m.\u001b[39mprofiler\u001b[38;5;241m.\u001b[39mexperimental\u001b[38;5;241m.\u001b[39mTrace(\n\u001b[0;32m 1403\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 1404\u001b[0m epoch_num\u001b[38;5;241m=\u001b[39mepoch,\n\u001b[0;32m 1405\u001b[0m step_num\u001b[38;5;241m=\u001b[39mstep,\n\u001b[0;32m 1406\u001b[0m batch_size\u001b[38;5;241m=\u001b[39mbatch_size,\n\u001b[0;32m 1407\u001b[0m _r\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m):\n\u001b[0;32m 1408\u001b[0m callbacks\u001b[38;5;241m.\u001b[39mon_train_batch_begin(step)\n\u001b[1;32m-> 1409\u001b[0m tmp_logs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain_function\u001b[49m\u001b[43m(\u001b[49m\u001b[43miterator\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1410\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data_handler\u001b[38;5;241m.\u001b[39mshould_sync:\n\u001b[0;32m 1411\u001b[0m context\u001b[38;5;241m.\u001b[39masync_wait()\n", "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\util\\traceback_utils.py:150\u001b[0m, in \u001b[0;36mfilter_traceback..error_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 148\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 149\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 150\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 151\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 152\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n", "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\eager\\def_function.py:915\u001b[0m, in \u001b[0;36mFunction.__call__\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 912\u001b[0m compiler \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mxla\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jit_compile \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnonXla\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 914\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m OptionalXlaContext(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jit_compile):\n\u001b[1;32m--> 915\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n\u001b[0;32m 917\u001b[0m new_tracing_count \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexperimental_get_tracing_count()\n\u001b[0;32m 918\u001b[0m without_tracing \u001b[38;5;241m=\u001b[39m (tracing_count \u001b[38;5;241m==\u001b[39m new_tracing_count)\n", "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\eager\\def_function.py:947\u001b[0m, in \u001b[0;36mFunction._call\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 944\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock\u001b[38;5;241m.\u001b[39mrelease()\n\u001b[0;32m 945\u001b[0m \u001b[38;5;66;03m# In this case we have created variables on the first call, so we run the\u001b[39;00m\n\u001b[0;32m 946\u001b[0m \u001b[38;5;66;03m# defunned version which is guaranteed to never create variables.\u001b[39;00m\n\u001b[1;32m--> 947\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_stateless_fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds) \u001b[38;5;66;03m# pylint: disable=not-callable\u001b[39;00m\n\u001b[0;32m 948\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_stateful_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 949\u001b[0m \u001b[38;5;66;03m# Release the lock early so that multiple threads can perform the call\u001b[39;00m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;66;03m# in parallel.\u001b[39;00m\n\u001b[0;32m 951\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock\u001b[38;5;241m.\u001b[39mrelease()\n", "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\eager\\function.py:2453\u001b[0m, in \u001b[0;36mFunction.__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 2450\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock:\n\u001b[0;32m 2451\u001b[0m (graph_function,\n\u001b[0;32m 2452\u001b[0m filtered_flat_args) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_maybe_define_function(args, kwargs)\n\u001b[1;32m-> 2453\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mgraph_function\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_flat\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2454\u001b[0m \u001b[43m \u001b[49m\u001b[43mfiltered_flat_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcaptured_inputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgraph_function\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcaptured_inputs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\eager\\function.py:1860\u001b[0m, in \u001b[0;36mConcreteFunction._call_flat\u001b[1;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[0;32m 1856\u001b[0m possible_gradient_type \u001b[38;5;241m=\u001b[39m gradients_util\u001b[38;5;241m.\u001b[39mPossibleTapeGradientTypes(args)\n\u001b[0;32m 1857\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (possible_gradient_type \u001b[38;5;241m==\u001b[39m gradients_util\u001b[38;5;241m.\u001b[39mPOSSIBLE_GRADIENT_TYPES_NONE\n\u001b[0;32m 1858\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m executing_eagerly):\n\u001b[0;32m 1859\u001b[0m \u001b[38;5;66;03m# No tape is watching; skip to running the function.\u001b[39;00m\n\u001b[1;32m-> 1860\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_build_call_outputs(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_inference_function\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1861\u001b[0m \u001b[43m \u001b[49m\u001b[43mctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcancellation_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcancellation_manager\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m 1862\u001b[0m forward_backward \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_select_forward_and_backward_functions(\n\u001b[0;32m 1863\u001b[0m args,\n\u001b[0;32m 1864\u001b[0m possible_gradient_type,\n\u001b[0;32m 1865\u001b[0m executing_eagerly)\n\u001b[0;32m 1866\u001b[0m forward_function, args_with_tangents \u001b[38;5;241m=\u001b[39m forward_backward\u001b[38;5;241m.\u001b[39mforward()\n", "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\eager\\function.py:497\u001b[0m, in \u001b[0;36m_EagerDefinedFunction.call\u001b[1;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[0;32m 495\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _InterpolateFunctionError(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m 496\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cancellation_manager \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 497\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mexecute\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 498\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msignature\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 499\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_outputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_num_outputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 500\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 501\u001b[0m \u001b[43m \u001b[49m\u001b[43mattrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 502\u001b[0m \u001b[43m \u001b[49m\u001b[43mctx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mctx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 503\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 504\u001b[0m outputs \u001b[38;5;241m=\u001b[39m execute\u001b[38;5;241m.\u001b[39mexecute_with_cancellation(\n\u001b[0;32m 505\u001b[0m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msignature\u001b[38;5;241m.\u001b[39mname),\n\u001b[0;32m 506\u001b[0m num_outputs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_num_outputs,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 509\u001b[0m ctx\u001b[38;5;241m=\u001b[39mctx,\n\u001b[0;32m 510\u001b[0m cancellation_manager\u001b[38;5;241m=\u001b[39mcancellation_manager)\n", "File \u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\eager\\execute.py:54\u001b[0m, in \u001b[0;36mquick_execute\u001b[1;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[0;32m 52\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 53\u001b[0m ctx\u001b[38;5;241m.\u001b[39mensure_initialized()\n\u001b[1;32m---> 54\u001b[0m tensors \u001b[38;5;241m=\u001b[39m \u001b[43mpywrap_tfe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mTFE_Py_Execute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mctx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_handle\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 55\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattrs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_outputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 56\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m core\u001b[38;5;241m.\u001b[39m_NotOkStatusException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 57\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", "\u001b[1;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "model = keras.models.load_model(\"transformer-steven-v5\")\n", "\n", "model.summary()\n", "\n", "\n", "x_val = np.load(\"data/x_200.npy\")\n", "y_val = np.load(\"data/y_200.npy\")\n", "for g in range(211,211):\n", " a = np.load(\"data/x_\" + str(g) + \".npy\")\n", " b = np.load(\"data/y_\" + str(g) + \".npy\")\n", " x_val = np.concatenate((x_val, a))\n", " y_val = np.concatenate((y_val, b))\n", "for k in range(50):\n", " for i in range(20):# let's make the last batch the validation (#200-220 inclusive) \n", " x = None\n", " y = None\n", " gc.collect()\n", " for j in range(10):\n", " print(10* i + j)\n", " new_x = np.load(\"data/x_\" + str(10* i + j) + \".npy\")\n", " new_y = np.load(\"data/y_\" + str(10* i + j) + \".npy\")\n", " if x is None and y is None:\n", " x = new_x\n", " y = new_y\n", " else:\n", " x = np.concatenate((x, new_x))\n", " y = np.concatenate((y, new_y))\n", " # this will need to be optimized\n", " x = x.reshape((x.shape[0], x.shape[1], 1))\n", " y = y.reshape((y.shape[0], y.shape[1], 1))\n", " x_val = x_val.reshape((x_val.shape[0], x_val.shape[1], 1))\n", " y_val = y_val.reshape((y_val.shape[0], y_val.shape[1], 1))\n", " print(x.shape)\n", " model.fit(x, y, batch_size = 128, validation_data = (x_val, y_val), epochs = 1)\n", " model.save(\"transformer-steven-v5\")\n", " #call evaluate/save with/without improvement\n", " #model optimization and look at other papers for inspiration" ] }, { "cell_type": "markdown", "id": "e66455f3", "metadata": {}, "source": [ "\n" ] }, { "cell_type": "code", "execution_count": 11, "id": "53636383", "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The tensorboard extension is already loaded. To reload it, use:\n", " %reload_ext tensorboard\n" ] }, { "data": { "text/plain": [ "Reusing TensorBoard on port 6006 (pid 13352), started 0:33:47 ago. (Use '!kill 13352' to kill it.)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%load_ext tensorboard\n", "%tensorboard --logdir logs/fit" ] }, { "cell_type": "code", "execution_count": 6, "id": "bf14db9b", "metadata": {}, "outputs": [ { "ename": "SyntaxError", "evalue": "invalid syntax (2121200483.py, line 1)", "output_type": "error", "traceback": [ "\u001b[1;36m Input \u001b[1;32mIn [6]\u001b[1;36m\u001b[0m\n\u001b[1;33m tensorboard dev upload --logdir \\\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n" ] } ], "source": [ "#tensorboard dev upload --logdir \\\n", "# 'tensorboard'" ] }, { "cell_type": "code", "execution_count": 4, "id": "d6800f8c", "metadata": {}, "outputs": [], "source": [ "from musicautobot.music_transformer.transform import idxenc2stream\n", "from musicautobot.vocab import MusicVocab" ] }, { "cell_type": "code", "execution_count": 3, "id": "7f4f8033", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"model\"\n", "__________________________________________________________________________________________________\n", " Layer (type) Output Shape Param # Connected to \n", "==================================================================================================\n", " input_1 (InputLayer) [(None, 100, 1)] 0 [] \n", " \n", " layer_normalization (LayerNorm (None, 100, 1) 2 ['input_1[0][0]'] \n", " alization) \n", " \n", " multi_head_attention (MultiHea (None, 100, 1) 701 ['layer_normalization[0][0]', \n", " dAttention) 'layer_normalization[0][0]'] \n", " \n", " dropout (Dropout) (None, 100, 1) 0 ['multi_head_attention[0][0]'] \n", " \n", " tf.__operators__.add (TFOpLamb (None, 100, 1) 0 ['dropout[0][0]', \n", " da) 'input_1[0][0]'] \n", " \n", " layer_normalization_1 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add[0][0]'] \n", " rmalization) \n", " \n", " conv1d (Conv1D) (None, 100, 4) 8 ['layer_normalization_1[0][0]'] \n", " \n", " dropout_1 (Dropout) (None, 100, 4) 0 ['conv1d[0][0]'] \n", " \n", " conv1d_1 (Conv1D) (None, 100, 1) 5 ['dropout_1[0][0]'] \n", " \n", " tf.__operators__.add_1 (TFOpLa (None, 100, 1) 0 ['conv1d_1[0][0]', \n", " mbda) 'tf.__operators__.add[0][0]'] \n", " \n", " layer_normalization_2 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_1[0][0]'] \n", " rmalization) \n", " \n", " multi_head_attention_1 (MultiH (None, 100, 1) 701 ['layer_normalization_2[0][0]', \n", " eadAttention) 'layer_normalization_2[0][0]'] \n", " \n", " dropout_2 (Dropout) (None, 100, 1) 0 ['multi_head_attention_1[0][0]'] \n", " \n", " tf.__operators__.add_2 (TFOpLa (None, 100, 1) 0 ['dropout_2[0][0]', \n", " mbda) 'tf.__operators__.add_1[0][0]'] \n", " \n", " layer_normalization_3 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_2[0][0]'] \n", " rmalization) \n", " \n", " conv1d_2 (Conv1D) (None, 100, 4) 8 ['layer_normalization_3[0][0]'] \n", " \n", " dropout_3 (Dropout) (None, 100, 4) 0 ['conv1d_2[0][0]'] \n", " \n", " conv1d_3 (Conv1D) (None, 100, 1) 5 ['dropout_3[0][0]'] \n", " \n", " tf.__operators__.add_3 (TFOpLa (None, 100, 1) 0 ['conv1d_3[0][0]', \n", " mbda) 'tf.__operators__.add_2[0][0]'] \n", " \n", " layer_normalization_4 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_3[0][0]'] \n", " rmalization) \n", " \n", " multi_head_attention_2 (MultiH (None, 100, 1) 701 ['layer_normalization_4[0][0]', \n", " eadAttention) 'layer_normalization_4[0][0]'] \n", " \n", " dropout_4 (Dropout) (None, 100, 1) 0 ['multi_head_attention_2[0][0]'] \n", " \n", " tf.__operators__.add_4 (TFOpLa (None, 100, 1) 0 ['dropout_4[0][0]', \n", " mbda) 'tf.__operators__.add_3[0][0]'] \n", " \n", " layer_normalization_5 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_4[0][0]'] \n", " rmalization) \n", " \n", " conv1d_4 (Conv1D) (None, 100, 4) 8 ['layer_normalization_5[0][0]'] \n", " \n", " dropout_5 (Dropout) (None, 100, 4) 0 ['conv1d_4[0][0]'] \n", " \n", " conv1d_5 (Conv1D) (None, 100, 1) 5 ['dropout_5[0][0]'] \n", " \n", " tf.__operators__.add_5 (TFOpLa (None, 100, 1) 0 ['conv1d_5[0][0]', \n", " mbda) 'tf.__operators__.add_4[0][0]'] \n", " \n", " layer_normalization_6 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_5[0][0]'] \n", " rmalization) \n", " \n", " multi_head_attention_3 (MultiH (None, 100, 1) 701 ['layer_normalization_6[0][0]', \n", " eadAttention) 'layer_normalization_6[0][0]'] \n", " \n", " dropout_6 (Dropout) (None, 100, 1) 0 ['multi_head_attention_3[0][0]'] \n", " \n", " tf.__operators__.add_6 (TFOpLa (None, 100, 1) 0 ['dropout_6[0][0]', \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " mbda) 'tf.__operators__.add_5[0][0]'] \n", " \n", " layer_normalization_7 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_6[0][0]'] \n", " rmalization) \n", " \n", " conv1d_6 (Conv1D) (None, 100, 4) 8 ['layer_normalization_7[0][0]'] \n", " \n", " dropout_7 (Dropout) (None, 100, 4) 0 ['conv1d_6[0][0]'] \n", " \n", " conv1d_7 (Conv1D) (None, 100, 1) 5 ['dropout_7[0][0]'] \n", " \n", " tf.__operators__.add_7 (TFOpLa (None, 100, 1) 0 ['conv1d_7[0][0]', \n", " mbda) 'tf.__operators__.add_6[0][0]'] \n", " \n", " layer_normalization_8 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_7[0][0]'] \n", " rmalization) \n", " \n", " multi_head_attention_4 (MultiH (None, 100, 1) 701 ['layer_normalization_8[0][0]', \n", " eadAttention) 'layer_normalization_8[0][0]'] \n", " \n", " dropout_8 (Dropout) (None, 100, 1) 0 ['multi_head_attention_4[0][0]'] \n", " \n", " tf.__operators__.add_8 (TFOpLa (None, 100, 1) 0 ['dropout_8[0][0]', \n", " mbda) 'tf.__operators__.add_7[0][0]'] \n", " \n", " layer_normalization_9 (LayerNo (None, 100, 1) 2 ['tf.__operators__.add_8[0][0]'] \n", " rmalization) \n", " \n", " conv1d_8 (Conv1D) (None, 100, 4) 8 ['layer_normalization_9[0][0]'] \n", " \n", " dropout_9 (Dropout) (None, 100, 4) 0 ['conv1d_8[0][0]'] \n", " \n", " conv1d_9 (Conv1D) (None, 100, 1) 5 ['dropout_9[0][0]'] \n", " \n", " tf.__operators__.add_9 (TFOpLa (None, 100, 1) 0 ['conv1d_9[0][0]', \n", " mbda) 'tf.__operators__.add_8[0][0]'] \n", " \n", " layer_normalization_10 (LayerN (None, 100, 1) 2 ['tf.__operators__.add_9[0][0]'] \n", " ormalization) \n", " \n", " multi_head_attention_5 (MultiH (None, 100, 1) 701 ['layer_normalization_10[0][0]', \n", " eadAttention) 'layer_normalization_10[0][0]'] \n", " \n", " dropout_10 (Dropout) (None, 100, 1) 0 ['multi_head_attention_5[0][0]'] \n", " \n", " tf.__operators__.add_10 (TFOpL (None, 100, 1) 0 ['dropout_10[0][0]', \n", " ambda) 'tf.__operators__.add_9[0][0]'] \n", " \n", " layer_normalization_11 (LayerN (None, 100, 1) 2 ['tf.__operators__.add_10[0][0]']\n", " ormalization) \n", " \n", " conv1d_10 (Conv1D) (None, 100, 4) 8 ['layer_normalization_11[0][0]'] \n", " \n", " dropout_11 (Dropout) (None, 100, 4) 0 ['conv1d_10[0][0]'] \n", " \n", " conv1d_11 (Conv1D) (None, 100, 1) 5 ['dropout_11[0][0]'] \n", " \n", " tf.__operators__.add_11 (TFOpL (None, 100, 1) 0 ['conv1d_11[0][0]', \n", " ambda) 'tf.__operators__.add_10[0][0]']\n", " \n", " global_average_pooling1d (Glob (None, 100) 0 ['tf.__operators__.add_11[0][0]']\n", " alAveragePooling1D) \n", " \n", " dense (Dense) (None, 80) 8080 ['global_average_pooling1d[0][0]'\n", " ] \n", " \n", " dropout_12 (Dropout) (None, 80) 0 ['dense[0][0]'] \n", " \n", " dense_1 (Dense) (None, 50) 4050 ['dropout_12[0][0]'] \n", " \n", " dropout_13 (Dropout) (None, 50) 0 ['dense_1[0][0]'] \n", " \n", " dense_2 (Dense) (None, 30) 1530 ['dropout_13[0][0]'] \n", " \n", " dropout_14 (Dropout) (None, 30) 0 ['dense_2[0][0]'] \n", " \n", " dense_3 (Dense) (None, 2) 62 ['dropout_14[0][0]'] \n", " \n", "==================================================================================================\n", "Total params: 18,030\n", "Trainable params: 18,030\n", "Non-trainable params: 0\n", "__________________________________________________________________________________________________\n" ] } ], "source": [ "#predict pipeline below\n", "\n", "model = keras.models.load_model(\"transformer-steven-final-v2\")\n", "model.summary()" ] }, { "cell_type": "code", "execution_count": 5, "id": "beaacb59", "metadata": {}, "outputs": [], "source": [ "def predict_music(model, input_vector, num):\n", " normalized = input_vector / 311\n", " for i in range(num):\n", " predict = model.predict(np.reshape(normalized[-100:], (1,100)), verbose = 0)\n", " normalized = np.append(normalized, predict)\n", " \n", " result = np.rint(normalized * 311)\n", " # edits to prediction\n", " for i in range(100, len(result)):\n", " if i % 2 == 0:\n", " if abs(result[i] - 8) < 5:\n", " print(\"change made to \" + str(result[i]))\n", " result[i] = 8\n", " else:\n", " if result[i] < 137:\n", " print(\"bounded \" + str(result[i]))\n", " result[i] = 137\n", " return result\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "5772c234", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "change made to 9.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "change made to 8.0\n", "[ 8. 138. 47. 137. ... 8. 137. 45. 137.]\n", "0 1 8 161 82 143 73 149 66 147 8 138 82 148 78 148 8 140 73 139 8 139 78 139 8 139 73 140 8 139 78 139 8 139 82 142 82 141 78 141 78 141 73 146 66 147 8 141 73 140 8 139 78 140 8 139 73 139 8 139 78 139 8 138 73 138 8 138 85 149 83 141 78 141 78 149 73 158 71 147 8 141 75 139 8 139 78 139 8 139 75 139 8 139 78 139 8 139 83 142 8 141 79 141 21 141 50 140 8 140 75 140 8 140 77 139 8 140 75 140 8 140 75 139 8 140 74 140 8 141 75 140 8 141 74 140 8 141 73 140 8 141 73 140 8 140 72 140 8 140 72 140 8 140 72 140 8 140 72 140 8 140 71 140 8 141 72 140 8 141 71 140 8 141 71 140 8 140 70 140 8 140 69 139 8 140 69 139 8 140 69 140 8 140 68 140 8 140 68 139 8 140 67 139 8 140 67 139 8 140 67 139 8 140 66 139 8 140 66 139 8 140 65 139 8 140 65 139 8 140 65 139 8 140 64 139 8 139 64 139 8 139 64 139 8 139 63 139 8 139 63 139 8 139 63 139 8 139 63 139 8 139 62 139 8 139 62 139 8 139 62 139 8 139 62 139 8 139 61 139 8 139 61 139 8 139 61 139 8 139 61 139 8 139 60 139 8 139 60 139 8 139 60 139 8 139 60 139 8 139 59 139 8 138 59 139 8 138 59 138 8 138 59 138 8 138 59 138 8 138 58 138 8 138 58 138 8 138 58 138 8 138 58 138 8 138 58 138 8 138 57 138 8 138 57 138 8 138 57 138 8 138 57 138 8 138 57 138 8 138 56 138 8 138 56 138 8 138 56 138 8 138 56 138 8 138 56 138 8 138 56 138 8 138 55 138 8 138 55 138 8 138 55 138 8 138 55 138 8 138 55 138 8 138 54 138 8 138 54 138 8 138 54 138 8 138 54 138 8 138 54 138 8 138 54 138 8 138 53 138 8 138 53 138 8 138 53 138 8 138 53 138 8 138 53 138 8 138 52 138 8 138 52 138 8 138 52 138 8 138 52 138 8 138 51 138 8 138 51 138 8 138 51 138 8 138 51 138 8 138 51 138 8 138 51 138 8 138 50 138 8 138 50 138 8 138 50 138 8 138 50 138 8 138 50 138 8 138 50 138 8 138 50 138 8 138 49 138 8 138 49 138 8 138 49 138 8 138 49 137 8 138 49 137 8 138 49 137 8 138 48 137 8 138 48 137 8 138 48 137 8 138 48 137 8 138 48 137 8 138 48 137 8 138 48 137 8 138 48 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 47 137 8 138 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 46 137 8 137 45 137 8 137 45 137 8 137 45 137 8 137 45 137 8 137 45 137 8 137 45 137 " ] } ], "source": [ "data = np.load(\"data.npy\", allow_pickle = True)\n", "test = data[500][0:100]\n", "def midi_predict(model, test):\n", " test_midi = idxenc2stream(test.astype(\"int\"), MusicVocab.create())\n", " test_midi.write('midi',\"input.mid\")\n", " res = predict_music(model, test, 300)\n", " print(res[-100:])\n", " output = idxenc2stream(res.astype(\"int\"), MusicVocab.create())\n", " output.write('midi',\"output.mid\")\n", " return res.astype(\"int\")\n", "array = midi_predict(model, test)\n", "for elem in array:\n", " print(elem, end = \" \")" ] }, { "cell_type": "code", "execution_count": 12, "id": "3268f624", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 84). These functions will not be directly callable after loading.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: transformer-steven-final-v2\\assets\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: transformer-steven-final-v2\\assets\n" ] } ], "source": [ "model.save(\"transformer-steven-final-v2\")" ] }, { "cell_type": "code", "execution_count": 40, "id": "235839fe", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 0 1 8 153 74 140 69 140 66 140 8 138 62 138 57 142 8 139\n", " 62 139 8 138 74 139 73 138 69 139 66 139 8 139 62 138 8 138\n", " 74 141 69 142 66 139 8 138 62 138 57 144 8 138 66 138 8 138\n", " 62 141 8 138 66 138 8 138 74 140 69 138 66 139 8 138 69 138\n", " 8 139 69 138 66 138 8 138 73 140 69 142 64 140 57 144 8 139\n", " 61 140 8 139 76 140 64 140 8 138]\n", "[ 0 1 8 153 74 140 69 140 66 140 8 138 62 138 57 142 8 139\n", " 62 139 8 138 74 139 73 138 69 139 66 139 8 139 62 138 8 138\n", " 74 141 69 142 66 139 8 138 62 138 57 144 8 138 66 138 8 138\n", " 62 141 8 138 66 138 8 138 74 140 69 138 66 139 8 138 69 138\n", " 8 139 69 138 66 138 8 138 73 140 69 142 64 140 57 144 8 139\n", " 61 140 8 139 76 140 64 140 8 138]\n" ] } ], "source": [ "# ssh -i \"C:\\Users\\drago\\Downloads\\ssh-key-2022-10-25.key\" ubuntu@150.136.137.227\n", "# scp -i \"C:\\Users\\drago\\Downloads\\ssh-key-2022-10-25.key\" ubuntu@150.136.137.227:/home/ubuntu/projectpathing/updatedCheckpoints C:\\Users\\drago\\Downloads\n", "print(test)\n", "print(test[-100:])" ] }, { "cell_type": "code", "execution_count": 14, "id": "2b570e5a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(772051, 2)\n" ] } ], "source": [ "print(y.shape)" ] }, { "cell_type": "code", "execution_count": 36, "id": "0216d1c5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"sequential_17\"\n", "_________________________________________________________________\n", " Layer (type) Output Shape Param # \n", "=================================================================\n", " dense_105 (Dense) (None, 100) 10100 \n", " \n", " dense_106 (Dense) (None, 100) 10100 \n", " \n", " dense_107 (Dense) (None, 85) 8585 \n", " \n", " dense_108 (Dense) (None, 75) 6450 \n", " \n", " dense_109 (Dense) (None, 65) 4940 \n", " \n", " dense_110 (Dense) (None, 35) 2310 \n", " \n", " dense_111 (Dense) (None, 2) 72 \n", " \n", "=================================================================\n", "Total params: 42,557\n", "Trainable params: 42,557\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" ] } ], "source": [ "model.summary()" ] }, { "cell_type": "code", "execution_count": 8, "id": "041a1f27", "metadata": {}, "outputs": [], "source": [ "def get_model_memory_usage(batch_size, model):\n", " import numpy as np\n", " try:\n", " from keras import backend as K\n", " except:\n", " from tensorflow.keras import backend as K\n", "\n", " shapes_mem_count = 0\n", " internal_model_mem_count = 0\n", " for l in model.layers:\n", " layer_type = l.__class__.__name__\n", " if layer_type == 'Model':\n", " internal_model_mem_count += get_model_memory_usage(batch_size, l)\n", " single_layer_mem = 1\n", " out_shape = l.output_shape\n", " if type(out_shape) is list:\n", " out_shape = out_shape[0]\n", " for s in out_shape:\n", " if s is None:\n", " continue\n", " single_layer_mem *= s\n", " shapes_mem_count += single_layer_mem\n", "\n", " trainable_count = np.sum([K.count_params(p) for p in model.trainable_weights])\n", " non_trainable_count = np.sum([K.count_params(p) for p in model.non_trainable_weights])\n", "\n", " number_size = 4.0\n", " if K.floatx() == 'float16':\n", " number_size = 2.0\n", " if K.floatx() == 'float64':\n", " number_size = 8.0\n", "\n", " total_memory = number_size * (batch_size * shapes_mem_count + trainable_count + non_trainable_count)\n", " gbytes = np.round(total_memory / (1024.0 ** 3), 3) + internal_model_mem_count\n", " return gbytes" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }