{ "cells": [ { "cell_type": "code", "execution_count": 236, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import pickle as pkl\n", "import math\n", "import gradio as gr\n", "from tqdm.notebook import tqdm\n", "\n", "import tensorflow as tf\n", "from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input\n", "from tensorflow.keras.models import Model\n", "from tensorflow.keras.utils import load_img, img_to_array\n", "from tensorflow.keras.utils import Sequence\n", "from tensorflow.keras.preprocessing.text import Tokenizer\n", "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "from tensorflow.keras.utils import to_categorical, plot_model\n", "from tensorflow.keras import layers\n", "from tensorflow.keras.layers import Resizing" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'2.10.0'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tf.__version__" ] }, { "cell_type": "code", "execution_count": 237, "metadata": {}, "outputs": [], "source": [ "# Read data \n", "df = pd.read_csv('captions.txt')" ] }, { "cell_type": "code", "execution_count": 238, "metadata": {}, "outputs": [], "source": [ "def preprocess_text(text):\n", " text = 'startseq ' + text + ' endseq'\n", " return text" ] }, { "cell_type": "code", "execution_count": 239, "metadata": {}, "outputs": [], "source": [ "# Adding start and end token\n", "df['caption'] = df['caption'].apply(preprocess_text)" ] }, { "cell_type": "code", "execution_count": 240, "metadata": {}, "outputs": [], "source": [ "# Tokenizer\n", "tokenizer = Tokenizer(num_words=5000)\n", "tokenizer.fit_on_texts(df['caption'])" ] }, { "cell_type": "code", "execution_count": 241, "metadata": {}, "outputs": [], "source": [ "# Vocabulary Size\n", "vocab_size = len(tokenizer.word_index) + 1" ] }, { "cell_type": "code", "execution_count": 242, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "39" ] }, "execution_count": 242, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Max len caption\n", "max_len = max(len(i) for i in tokenizer.texts_to_sequences(df['caption']))" ] }, { "cell_type": "code", "execution_count": 243, "metadata": {}, "outputs": [], "source": [ "# Join all different list of caption into one\n", "df = df.groupby('image',as_index=False).aggregate({'caption':list})" ] }, { "cell_type": "code", "execution_count": 246, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
imagecaption
01000268201_693b08cb0e.jpg[startseq A child in a pink dress is climbing ...
11001773457_577c3a7d70.jpg[startseq A black dog and a spotted dog are fi...
21002674143_1b742ab4b8.jpg[startseq A little girl covered in paint sits ...
31003163366_44323f5815.jpg[startseq A man lays on a bench while his dog ...
41007129816_e794419615.jpg[startseq A man in an orange hat starring at s...
\n", "
" ], "text/plain": [ " image \\\n", "0 1000268201_693b08cb0e.jpg \n", "1 1001773457_577c3a7d70.jpg \n", "2 1002674143_1b742ab4b8.jpg \n", "3 1003163366_44323f5815.jpg \n", "4 1007129816_e794419615.jpg \n", "\n", " caption \n", "0 [startseq A child in a pink dress is climbing ... \n", "1 [startseq A black dog and a spotted dog are fi... \n", "2 [startseq A little girl covered in paint sits ... \n", "3 [startseq A man lays on a bench while his dog ... \n", "4 [startseq A man in an orange hat starring at s... " ] }, "execution_count": 246, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 247, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 [startseq A child in a pink dress is climbing ...\n", "1 [startseq A black dog and a spotted dog are fi...\n", "2 [startseq A little girl covered in paint sits ...\n", "3 [startseq A man lays on a bench while his dog ...\n", "4 [startseq A man in an orange hat starring at s...\n", " ... \n", "8086 [startseq A man does a wheelie on his bicycle ...\n", "8087 [startseq A group is sitting around a snowy cr...\n", "8088 [startseq A grey bird stands majestically on a...\n", "8089 [startseq A person stands near golden walls . ...\n", "8090 [startseq A man in a pink shirt climbs a rock ...\n", "Name: caption, Length: 8091, dtype: object" ] }, "execution_count": 247, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['caption']" ] }, { "cell_type": "code", "execution_count": 248, "metadata": {}, "outputs": [], "source": [ "# Train Test Split\n", "train_data = df.iloc[:6500]\n", "test = df.iloc[6500:]" ] }, { "cell_type": "code", "execution_count": 253, "metadata": {}, "outputs": [ { "ename": "ResourceExhaustedError", "evalue": "{{function_node __wrapped__Mul_device_/job:localhost/replica:0/task:0/device:GPU:0}} failed to allocate memory [Op:Mul]", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mResourceExhaustedError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[253], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m vgg_model \u001b[38;5;241m=\u001b[39m \u001b[43mVGG16\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3\u001b[0m vgg_model\u001b[38;5;241m.\u001b[39mtrainable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n", "File \u001b[1;32mc:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\keras\\applications\\vgg16.py:210\u001b[0m, in \u001b[0;36mVGG16\u001b[1;34m(include_top, weights, input_tensor, input_shape, pooling, classes, classifier_activation)\u001b[0m\n\u001b[0;32m 207\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m include_top:\n\u001b[0;32m 208\u001b[0m \u001b[38;5;66;03m# Classification block\u001b[39;00m\n\u001b[0;32m 209\u001b[0m x \u001b[38;5;241m=\u001b[39m layers\u001b[38;5;241m.\u001b[39mFlatten(name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mflatten\u001b[39m\u001b[38;5;124m\"\u001b[39m)(x)\n\u001b[1;32m--> 210\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[43mlayers\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDense\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m4096\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mactivation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrelu\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfc1\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 211\u001b[0m x \u001b[38;5;241m=\u001b[39m layers\u001b[38;5;241m.\u001b[39mDense(\u001b[38;5;241m4096\u001b[39m, activation\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrelu\u001b[39m\u001b[38;5;124m\"\u001b[39m, name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfc2\u001b[39m\u001b[38;5;124m\"\u001b[39m)(x)\n\u001b[0;32m 213\u001b[0m imagenet_utils\u001b[38;5;241m.\u001b[39mvalidate_activation(classifier_activation, weights)\n", "File \u001b[1;32mc:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\keras\\utils\\traceback_utils.py:70\u001b[0m, in \u001b[0;36mfilter_traceback..error_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 67\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[0;32m 68\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[0;32m 69\u001b[0m \u001b[38;5;66;03m# `tf.debugging.disable_traceback_filtering()`\u001b[39;00m\n\u001b[1;32m---> 70\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 71\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 72\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n", "File \u001b[1;32mc:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\keras\\backend.py:2100\u001b[0m, in \u001b[0;36mRandomGenerator.random_uniform\u001b[1;34m(self, shape, minval, maxval, dtype, nonce)\u001b[0m\n\u001b[0;32m 2098\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m nonce:\n\u001b[0;32m 2099\u001b[0m seed \u001b[38;5;241m=\u001b[39m tf\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mexperimental\u001b[38;5;241m.\u001b[39mstateless_fold_in(seed, nonce)\n\u001b[1;32m-> 2100\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstateless_uniform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2101\u001b[0m \u001b[43m \u001b[49m\u001b[43mshape\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2102\u001b[0m \u001b[43m \u001b[49m\u001b[43mminval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mminval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2103\u001b[0m \u001b[43m \u001b[49m\u001b[43mmaxval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaxval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2104\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2105\u001b[0m \u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseed\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2106\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2107\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m tf\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39muniform(\n\u001b[0;32m 2108\u001b[0m shape\u001b[38;5;241m=\u001b[39mshape,\n\u001b[0;32m 2109\u001b[0m minval\u001b[38;5;241m=\u001b[39mminval,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2112\u001b[0m seed\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmake_legacy_seed(),\n\u001b[0;32m 2113\u001b[0m )\n", "\u001b[1;31mResourceExhaustedError\u001b[0m: {{function_node __wrapped__Mul_device_/job:localhost/replica:0/task:0/device:GPU:0}} failed to allocate memory [Op:Mul]" ] } ], "source": [ "# Image Feauture extraction model\n", "vgg_model = VGG16()\n", "vgg_model.trainable = False\n", "img_model = Model(inputs = vgg_model.input,outputs=vgg_model.layers[-2].output)" ] }, { "cell_type": "code", "execution_count": 127, "metadata": {}, "outputs": [], "source": [ "target_shape = (224, 224)\n", "batch_size = 32" ] }, { "cell_type": "code", "execution_count": 128, "metadata": {}, "outputs": [], "source": [ "# Load all image preprocess it and Extract feature from image\n", "image_features = {}\n", "base_dir = 'Images/'\n", "\n", "for image in tqdm(df['image']):\n", " img_path = base_dir + image\n", " img = load_img(img_path,\n", " target_size=target_shape)\n", " img = img_to_array(img)\n", " img = tf.expand_dims(img,axis=0)\n", " img = preprocess_input(img)\n", "\n", " feature = vgg_model.predict(img,verbose=False)\n", " image_features[image] = feature\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 129, "metadata": {}, "outputs": [], "source": [ "# with open('img_features.pkl','wb') as f:\n", "# pkl.dump(image_features,f)" ] }, { "cell_type": "code", "execution_count": 130, "metadata": {}, "outputs": [], "source": [ "# pickling the image feature dict\n", "with open('img_features.pkl','rb') as f:\n", " img_features = pkl.load(f)" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [], "source": [ "# Custome Data genarator for seq-to-seq Modeling\n", "class data_genarator(Sequence):\n", "\n", " def __init__(self,data,img_features,tokenizer,batch_size=32):\n", " self.data = data\n", " self.img_features = img_features\n", " self.tokenizer = tokenizer\n", " self.batch_size = batch_size\n", "\n", " def __len__(self):\n", " # Returns the number of batches\n", " return len(self.data) // self.batch_size\n", " \n", " def __getitem__(self,index):\n", " # Return each batch of data\n", " batch_data = self.data.iloc[index * self.batch_size : (index+1) * self.batch_size, :]\n", " X1, X2, y = self.__get_data(batch_data)\n", " return [X1, X2], y\n", " \n", " def __get_data(self,batch_data):\n", " # Genarate Sequntial data \n", " X1 , X2, y = list(), list(), list()\n", "\n", " for image_id in batch_data['image']:\n", " img_vector = self.img_features[image_id][0]\n", " captions = batch_data[batch_data['image']==image_id]['caption'].values[0]\n", " \n", " for caption in captions:\n", " seq = self.tokenizer.texts_to_sequences([caption])[0]\n", " for i in range(1,len(seq)):\n", "\n", " input_seq = seq[:i]\n", " output_seq = seq[i]\n", " input_seq = pad_sequences([input_seq],\n", " maxlen=30,\n", " padding='post')[0]\n", " \n", " output_seq = to_categorical([output_seq],\n", " num_classes=vocab_size)[0]\n", "\n", " X1.append(img_vector)\n", " X2.append(input_seq)\n", " y.append(output_seq)\n", " \n", " X1, X2, y = np.array(X1),np.array(X2),np.array(y)\n", " return X1, X2, y\n", "\n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n", "\n", " " ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [], "source": [ "# class CustomDataGenerator(Sequence):\n", " \n", "# def __init__(self, df, X_col, y_col, batch_size, tokenizer, \n", "# vocab_size, max_length, features,shuffle=True):\n", " \n", "# self.df = df.copy()\n", "# self.X_col = X_col\n", "# self.y_col = y_col\n", "# self.batch_size = batch_size\n", "# self.tokenizer = tokenizer\n", "# self.vocab_size = vocab_size\n", "# self.max_length = max_length\n", "# self.features = features\n", "# self.shuffle = shuffle\n", "# self.n = len(self.df)\n", " \n", "# def on_epoch_end(self):\n", "# if self.shuffle:\n", "# self.df = self.df.sample(frac=1).reset_index(drop=True)\n", " \n", "# def __len__(self):\n", "# return self.n // self.batch_size\n", " \n", "# def __getitem__(self,index):\n", " \n", "# batch = self.df.iloc[index * self.batch_size:(index + 1) * self.batch_size,:]\n", "# X1, X2, y = self.__get_data(batch) \n", "# return (X1, X2), y\n", " \n", "# def __get_data(self,batch): \n", " \n", "# X1, X2, y = list(), list(), list()\n", "# images = batch[self.X_col].tolist()\n", " \n", "# for image in images:\n", "# feature = self.features[image][0]\n", "# captions = batch.loc[batch[self.X_col]==image, self.y_col].values[0]\n", "\n", "# for caption in captions:\n", "# seq = self.tokenizer.texts_to_sequences([caption])[0]\n", "\n", "# for i in range(1,len(seq)):\n", "# in_seq, out_seq = seq[:i], seq[i]\n", "# in_seq = pad_sequences([in_seq], maxlen=self.max_length,padding='post')[0]\n", "# out_seq = to_categorical([out_seq], num_classes=self.vocab_size)[0]\n", "# X1.append(feature)\n", "# X2.append(in_seq)\n", "# y.append(out_seq)\n", " \n", "# X1, X2, y = np.array(X1), np.array(X2), np.array(y)\n", " \n", "# return X1, X2, y" ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [], "source": [ "# Model Building\n", "\n", "input_1 = layers.Input(shape=(1000,))\n", "fc1 = layers.Dense(512,activation='relu')(input_1)\n", "\n", "input_2 = layers.Input(shape=(30,))\n", "embedding = layers.Embedding(vocab_size,512,mask_zero=True)(input_2)\n", "\n", "add = layers.add([fc1,embedding])\n", "\n", "lstm = layers.LSTM(128)(add)\n", "fc2 = layers.Dense(256, activation='relu')(lstm)\n", "softmax = layers.Dense(vocab_size,activation='softmax')(fc2)\n", "\n", "\n", "model = Model(inputs=[input_1,input_2],\n", " outputs=softmax )\n", "\n", "model.compile(loss='categorical_crossentropy', optimizer='rmsprop')" ] }, { "cell_type": "code", "execution_count": 136, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"model_3\"\n", "__________________________________________________________________________________________________\n", " Layer (type) Output Shape Param # Connected to \n", "==================================================================================================\n", " input_7 (InputLayer) [(None, 1000)] 0 [] \n", " \n", " input_8 (InputLayer) [(None, 30)] 0 [] \n", " \n", " dense_9 (Dense) (None, 512) 512512 ['input_7[0][0]'] \n", " \n", " embedding_3 (Embedding) (None, 30, 512) 4349952 ['input_8[0][0]'] \n", " \n", " add_3 (Add) (None, 30, 512) 0 ['dense_9[0][0]', \n", " 'embedding_3[0][0]'] \n", " \n", " lstm_3 (LSTM) (None, 128) 328192 ['add_3[0][0]'] \n", " \n", " dense_10 (Dense) (None, 256) 33024 ['lstm_3[0][0]'] \n", " \n", " dense_11 (Dense) (None, 8496) 2183472 ['dense_10[0][0]'] \n", " \n", "==================================================================================================\n", "Total params: 7,407,152\n", "Trainable params: 7,407,152\n", "Non-trainable params: 0\n", "__________________________________________________________________________________________________\n" ] } ], "source": [ "model.summary()" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "" ] }, "execution_count": 137, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plot_model(model,show_shapes=True)" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [], "source": [ "train_ds = data_genarator(data=train_data,img_features=img_features,tokenizer=tokenizer)\n", "\n", "test_ds = data_genarator(data=test,img_features=img_features,tokenizer=tokenizer)" ] }, { "cell_type": "code", "execution_count": 139, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1998, 30)" ] }, "execution_count": 139, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_ds[0][0][1].shape" ] }, { "cell_type": "code", "execution_count": 140, "metadata": {}, "outputs": [], "source": [ "# train_generator = CustomDataGenerator(df=train_data,\n", "# X_col='image',\n", "# y_col='caption',\n", "# batch_size=32,\n", "# tokenizer=tokenizer,\n", "# vocab_size=vocab_size,\n", "# max_length=30,\n", "# features=img_features)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/20\n", "203/203 [==============================] - 40s 176ms/step - loss: 4.9081 - val_loss: 4.1670\n", "Epoch 2/20\n", "203/203 [==============================] - 34s 166ms/step - loss: 3.9464 - val_loss: 3.7417\n", "Epoch 3/20\n", "203/203 [==============================] - 33s 161ms/step - loss: 3.6003 - val_loss: 3.5350\n", "Epoch 4/20\n", "203/203 [==============================] - 32s 155ms/step - loss: 3.3984 - val_loss: 3.4103\n", "Epoch 5/20\n", "203/203 [==============================] - 32s 158ms/step - loss: 3.2561 - val_loss: 3.3249\n", "Epoch 6/20\n", "203/203 [==============================] - 34s 166ms/step - loss: 3.1435 - val_loss: 3.2565\n", "Epoch 7/20\n", "203/203 [==============================] - 35s 170ms/step - loss: 3.0519 - val_loss: 3.2185\n", "Epoch 8/20\n", "203/203 [==============================] - 35s 172ms/step - loss: 2.9731 - val_loss: 3.1798\n", "Epoch 9/20\n", "203/203 [==============================] - 401s 2s/step - loss: 2.9064 - val_loss: 3.1590\n", "Epoch 10/20\n", "203/203 [==============================] - 18453s 91s/step - loss: 2.8465 - val_loss: 3.1371\n", "Epoch 11/20\n", "203/203 [==============================] - 21s 106ms/step - loss: 2.7938 - val_loss: 3.1293\n", "Epoch 12/20\n", "203/203 [==============================] - 21s 104ms/step - loss: 2.7434 - val_loss: 3.1344\n", "Epoch 13/20\n", "203/203 [==============================] - 22s 106ms/step - loss: 2.6983 - val_loss: 3.1293\n", "Epoch 14/20\n", "203/203 [==============================] - 25s 125ms/step - loss: 2.6549 - val_loss: 3.1152\n", "Epoch 15/20\n", "203/203 [==============================] - 26s 127ms/step - loss: 2.6134 - val_loss: 3.1209\n", "Epoch 16/20\n", "203/203 [==============================] - 25s 124ms/step - loss: 2.5752 - val_loss: 3.1269\n", "Epoch 17/20\n", " 42/203 [=====>........................] - ETA: 17s - loss: 2.4897" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[145], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrain_ds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43mepochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m20\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalidation_data\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest_ds\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mc:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\keras\\utils\\traceback_utils.py:65\u001b[0m, in \u001b[0;36mfilter_traceback..error_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 63\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 64\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 65\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 66\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 67\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n", "File \u001b[1;32mc:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\keras\\engine\\training.py:1564\u001b[0m, in \u001b[0;36mModel.fit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[0;32m 1556\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m tf\u001b[38;5;241m.\u001b[39mprofiler\u001b[38;5;241m.\u001b[39mexperimental\u001b[38;5;241m.\u001b[39mTrace(\n\u001b[0;32m 1557\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 1558\u001b[0m epoch_num\u001b[38;5;241m=\u001b[39mepoch,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1561\u001b[0m _r\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m,\n\u001b[0;32m 1562\u001b[0m ):\n\u001b[0;32m 1563\u001b[0m callbacks\u001b[38;5;241m.\u001b[39mon_train_batch_begin(step)\n\u001b[1;32m-> 1564\u001b[0m tmp_logs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain_function\u001b[49m\u001b[43m(\u001b[49m\u001b[43miterator\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1565\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data_handler\u001b[38;5;241m.\u001b[39mshould_sync:\n\u001b[0;32m 1566\u001b[0m context\u001b[38;5;241m.\u001b[39masync_wait()\n", "File \u001b[1;32mc:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\tensorflow\\python\\util\\traceback_utils.py:150\u001b[0m, in \u001b[0;36mfilter_traceback..error_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 148\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 149\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 150\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 151\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 152\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n", "File \u001b[1;32mc:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\tensorflow\\python\\eager\\def_function.py:915\u001b[0m, in \u001b[0;36mFunction.__call__\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 912\u001b[0m compiler \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mxla\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jit_compile \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnonXla\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 914\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m OptionalXlaContext(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jit_compile):\n\u001b[1;32m--> 915\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 917\u001b[0m new_tracing_count \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexperimental_get_tracing_count()\n\u001b[0;32m 918\u001b[0m without_tracing \u001b[38;5;241m=\u001b[39m (tracing_count \u001b[38;5;241m==\u001b[39m new_tracing_count)\n", "File \u001b[1;32mc:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\tensorflow\\python\\eager\\def_function.py:947\u001b[0m, in \u001b[0;36mFunction._call\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 944\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock\u001b[38;5;241m.\u001b[39mrelease()\n\u001b[0;32m 945\u001b[0m \u001b[38;5;66;03m# In this case we have created variables on the first call, so we run the\u001b[39;00m\n\u001b[0;32m 946\u001b[0m \u001b[38;5;66;03m# defunned version which is guaranteed to never create variables.\u001b[39;00m\n\u001b[1;32m--> 947\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_stateless_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# pylint: disable=not-callable\u001b[39;00m\n\u001b[0;32m 948\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_stateful_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 949\u001b[0m \u001b[38;5;66;03m# Release the lock early so that multiple threads can perform the call\u001b[39;00m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;66;03m# in parallel.\u001b[39;00m\n\u001b[0;32m 951\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock\u001b[38;5;241m.\u001b[39mrelease()\n", "File \u001b[1;32mc:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\tensorflow\\python\\eager\\function.py:2496\u001b[0m, in \u001b[0;36mFunction.__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 2493\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock:\n\u001b[0;32m 2494\u001b[0m (graph_function,\n\u001b[0;32m 2495\u001b[0m filtered_flat_args) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_maybe_define_function(args, kwargs)\n\u001b[1;32m-> 2496\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mgraph_function\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_flat\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2497\u001b[0m \u001b[43m \u001b[49m\u001b[43mfiltered_flat_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcaptured_inputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgraph_function\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcaptured_inputs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mc:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\tensorflow\\python\\eager\\function.py:1862\u001b[0m, in \u001b[0;36mConcreteFunction._call_flat\u001b[1;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[0;32m 1858\u001b[0m possible_gradient_type \u001b[38;5;241m=\u001b[39m gradients_util\u001b[38;5;241m.\u001b[39mPossibleTapeGradientTypes(args)\n\u001b[0;32m 1859\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (possible_gradient_type \u001b[38;5;241m==\u001b[39m gradients_util\u001b[38;5;241m.\u001b[39mPOSSIBLE_GRADIENT_TYPES_NONE\n\u001b[0;32m 1860\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m executing_eagerly):\n\u001b[0;32m 1861\u001b[0m \u001b[38;5;66;03m# No tape is watching; skip to running the function.\u001b[39;00m\n\u001b[1;32m-> 1862\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_build_call_outputs(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_inference_function\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1863\u001b[0m \u001b[43m \u001b[49m\u001b[43mctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcancellation_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcancellation_manager\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m 1864\u001b[0m forward_backward \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_select_forward_and_backward_functions(\n\u001b[0;32m 1865\u001b[0m args,\n\u001b[0;32m 1866\u001b[0m possible_gradient_type,\n\u001b[0;32m 1867\u001b[0m executing_eagerly)\n\u001b[0;32m 1868\u001b[0m forward_function, args_with_tangents \u001b[38;5;241m=\u001b[39m forward_backward\u001b[38;5;241m.\u001b[39mforward()\n", "File \u001b[1;32mc:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\tensorflow\\python\\eager\\function.py:499\u001b[0m, in \u001b[0;36m_EagerDefinedFunction.call\u001b[1;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[0;32m 497\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _InterpolateFunctionError(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m 498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cancellation_manager \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 499\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mexecute\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 500\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msignature\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 501\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_outputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_num_outputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 502\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 503\u001b[0m \u001b[43m \u001b[49m\u001b[43mattrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 504\u001b[0m \u001b[43m \u001b[49m\u001b[43mctx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mctx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 505\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 506\u001b[0m outputs \u001b[38;5;241m=\u001b[39m execute\u001b[38;5;241m.\u001b[39mexecute_with_cancellation(\n\u001b[0;32m 507\u001b[0m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msignature\u001b[38;5;241m.\u001b[39mname),\n\u001b[0;32m 508\u001b[0m num_outputs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_num_outputs,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 511\u001b[0m ctx\u001b[38;5;241m=\u001b[39mctx,\n\u001b[0;32m 512\u001b[0m cancellation_manager\u001b[38;5;241m=\u001b[39mcancellation_manager)\n", "File \u001b[1;32mc:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\tensorflow\\python\\eager\\execute.py:54\u001b[0m, in \u001b[0;36mquick_execute\u001b[1;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[0;32m 52\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 53\u001b[0m ctx\u001b[38;5;241m.\u001b[39mensure_initialized()\n\u001b[1;32m---> 54\u001b[0m tensors \u001b[38;5;241m=\u001b[39m \u001b[43mpywrap_tfe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mTFE_Py_Execute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mctx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_handle\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 55\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattrs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_outputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 56\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m core\u001b[38;5;241m.\u001b[39m_NotOkStatusException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 57\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", "\u001b[1;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "model.fit(train_ds,\n", " epochs=15,\n", " validation_data=test_ds)" ] }, { "cell_type": "code", "execution_count": 254, "metadata": {}, "outputs": [], "source": [ "# model.save('caption_genaration_model.h5')" ] }, { "cell_type": "code", "execution_count": 177, "metadata": {}, "outputs": [], "source": [ "def index_to_word(word_idx):\n", " return tokenizer.index_word[word_idx]" ] }, { "cell_type": "code", "execution_count": 192, "metadata": {}, "outputs": [], "source": [ "def predict_next(img):\n", "\n", " seq_in = 'startseq'\n", " feature_img = img_features[img]\n", "\n", " for i in range(30):\n", " seq_in_sequence = tokenizer.texts_to_sequences([seq_in])[0]\n", " seq_in_padded = pad_sequences([seq_in_sequence], padding='post',maxlen=30)\n", " y_hat = model.predict([feature_img,seq_in_padded],verbose=False)\n", " word_index = y_hat.argmax(axis=1)\n", " predicted_word = index_to_word(word_index[0])\n", "\n", " if predicted_word == 'endseq':\n", " break\n", " seq_in = seq_in + ' ' + predicted_word \n", "\n", " return seq_in\n" ] }, { "cell_type": "code", "execution_count": 193, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'startseq a man in a black helmet and a black helmet riding a bike'" ] }, "execution_count": 193, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predict_next('990890291_afc72be141.jpg')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Caption Genaration" ] }, { "cell_type": "code", "execution_count": 230, "metadata": {}, "outputs": [], "source": [ "resize_img = Resizing(height=224, width=224)\n", "\n", "def img_preprocces(img): \n", " # preprocces image & Extract feature\n", " img = tf.expand_dims(img,axis=0)\n", " resized_image = resize_img(img)\n", " img = preprocess_input(resized_image)\n", " feature = vgg_model.predict(img,verbose=False)\n", "\n", " return feature" ] }, { "cell_type": "code", "execution_count": 231, "metadata": {}, "outputs": [], "source": [ "\n", "def genarate_caption(img):\n", "\n", " seq_in = 'startseq'\n", " feature_img = img_preprocces(img)\n", " \n", " for i in range(30):\n", " seq_in_sequence = tokenizer.texts_to_sequences([seq_in])[0]\n", " seq_in_padded = pad_sequences([seq_in_sequence], padding='post',maxlen=30)\n", " y_hat = model.predict([feature_img,seq_in_padded],verbose=False)\n", " word_index = y_hat.argmax(axis=1)\n", " predicted_word = index_to_word(word_index[0])\n", " seq_in = seq_in + ' ' + predicted_word \n", " if predicted_word == 'endseq':\n", " break\n", " \n", " return seq_in" ] }, { "cell_type": "code", "execution_count": 232, "metadata": {}, "outputs": [ { "ename": "UnimplementedError", "evalue": "Exception encountered when calling layer \"resizing_2\" \" f\"(type Resizing).\n\n{{function_node __wrapped__Cast_device_/job:localhost/replica:0/task:0/device:CPU:0}} Cast string to float is not supported [Op:Cast]\n\nCall arguments received by layer \"resizing_2\" \" f\"(type Resizing):\n • inputs=tf.Tensor(shape=(1,), dtype=string)", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mUnimplementedError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[232], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mpredict_next\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mimg.jpg\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", "Cell \u001b[1;32mIn[206], line 5\u001b[0m, in \u001b[0;36mpredict_next\u001b[1;34m(img)\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpredict_next\u001b[39m(img):\n\u001b[0;32m 3\u001b[0m seq_in \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstartseq\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m----> 5\u001b[0m feature_img \u001b[38;5;241m=\u001b[39m \u001b[43mimg_preprocces\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 7\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m30\u001b[39m):\n\u001b[0;32m 8\u001b[0m seq_in_sequence \u001b[38;5;241m=\u001b[39m tokenizer\u001b[38;5;241m.\u001b[39mtexts_to_sequences([seq_in])[\u001b[38;5;241m0\u001b[39m]\n", "Cell \u001b[1;32mIn[230], line 7\u001b[0m, in \u001b[0;36mimg_preprocces\u001b[1;34m(img)\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mimg_preprocces\u001b[39m(img): \n\u001b[0;32m 5\u001b[0m img \u001b[38;5;241m=\u001b[39m tf\u001b[38;5;241m.\u001b[39mexpand_dims(img,axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m----> 7\u001b[0m resized_image \u001b[38;5;241m=\u001b[39m \u001b[43mresize_img\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 9\u001b[0m img \u001b[38;5;241m=\u001b[39m preprocess_input(resized_image)\n\u001b[0;32m 12\u001b[0m feature \u001b[38;5;241m=\u001b[39m vgg_model\u001b[38;5;241m.\u001b[39mpredict(img,verbose\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", "File \u001b[1;32mc:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\keras\\utils\\traceback_utils.py:70\u001b[0m, in \u001b[0;36mfilter_traceback..error_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 67\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[0;32m 68\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[0;32m 69\u001b[0m \u001b[38;5;66;03m# `tf.debugging.disable_traceback_filtering()`\u001b[39;00m\n\u001b[1;32m---> 70\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 71\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 72\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n", "File \u001b[1;32mc:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\tensorflow\\python\\framework\\ops.py:7209\u001b[0m, in \u001b[0;36mraise_from_not_ok_status\u001b[1;34m(e, name)\u001b[0m\n\u001b[0;32m 7207\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mraise_from_not_ok_status\u001b[39m(e, name):\n\u001b[0;32m 7208\u001b[0m e\u001b[38;5;241m.\u001b[39mmessage \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m name: \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m name \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m-> 7209\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m core\u001b[38;5;241m.\u001b[39m_status_to_exception(e) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", "\u001b[1;31mUnimplementedError\u001b[0m: Exception encountered when calling layer \"resizing_2\" \" f\"(type Resizing).\n\n{{function_node __wrapped__Cast_device_/job:localhost/replica:0/task:0/device:CPU:0}} Cast string to float is not supported [Op:Cast]\n\nCall arguments received by layer \"resizing_2\" \" f\"(type Resizing):\n • inputs=tf.Tensor(shape=(1,), dtype=string)" ] } ], "source": [ "predict_next('img.jpg')" ] }, { "cell_type": "code", "execution_count": 234, "metadata": {}, "outputs": [], "source": [ "# Demo modeling\n", "\n", "demo = gr.Interface(\n", " fn=genarate_caption,\n", " inputs=['image'],\n", " outputs=['text']\n", ")" ] }, { "cell_type": "code", "execution_count": 235, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7863\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 235, "metadata": {}, "output_type": "execute_result" }, { "name": "stderr", "output_type": "stream", "text": [ "ERROR: Exception in ASGI application\n", "Traceback (most recent call last):\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\uvicorn\\protocols\\http\\httptools_impl.py\", line 401, in run_asgi\n", " result = await app( # type: ignore[func-returns-value]\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\uvicorn\\middleware\\proxy_headers.py\", line 60, in __call__\n", " return await self.app(scope, receive, send)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\fastapi\\applications.py\", line 1054, in __call__\n", " await super().__call__(scope, receive, send)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\applications.py\", line 113, in __call__\n", " await self.middleware_stack(scope, receive, send)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\middleware\\errors.py\", line 187, in __call__\n", " raise exc\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\middleware\\errors.py\", line 165, in __call__\n", " await self.app(scope, receive, _send)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\gradio\\route_utils.py\", line 761, in __call__\n", " await self.app(scope, receive, send)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\middleware\\exceptions.py\", line 62, in __call__\n", " await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\_exception_handler.py\", line 53, in wrapped_app\n", " raise exc\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\_exception_handler.py\", line 42, in wrapped_app\n", " await app(scope, receive, sender)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\routing.py\", line 715, in __call__\n", " await self.middleware_stack(scope, receive, send)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\routing.py\", line 735, in app\n", " await route.handle(scope, receive, send)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\routing.py\", line 288, in handle\n", " await self.app(scope, receive, send)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\routing.py\", line 76, in app\n", " await wrap_app_handling_exceptions(app, request)(scope, receive, send)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\_exception_handler.py\", line 53, in wrapped_app\n", " raise exc\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\_exception_handler.py\", line 42, in wrapped_app\n", " await app(scope, receive, sender)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\routing.py\", line 74, in app\n", " await response(scope, receive, send)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\responses.py\", line 348, in __call__\n", " await self._handle_simple(send, send_header_only)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\responses.py\", line 377, in _handle_simple\n", " await send({\"type\": \"http.response.body\", \"body\": chunk, \"more_body\": more_body})\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\_exception_handler.py\", line 39, in sender\n", " await send(message)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\_exception_handler.py\", line 39, in sender\n", " await send(message)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\starlette\\middleware\\errors.py\", line 162, in _send\n", " await send(message)\n", " File \"c:\\Users\\VICTUS\\anaconda3\\envs\\GPU\\lib\\site-packages\\uvicorn\\protocols\\http\\httptools_impl.py\", line 536, in send\n", " raise RuntimeError(\"Response content shorter than Content-Length\")\n", "RuntimeError: Response content shorter than Content-Length\n" ] } ], "source": [ "demo.launch()" ] }, { "cell_type": "code", "execution_count": 255, "metadata": {}, "outputs": [], "source": [ "with open('tokenizer.pkl','wb') as f:\n", " pkl.dump(tokenizer,f)" ] }, { "cell_type": "code", "execution_count": 256, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'tf' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mtf\u001b[49m\u001b[38;5;241m.\u001b[39m__version__\n", "\u001b[1;31mNameError\u001b[0m: name 'tf' is not defined" ] } ], "source": [ "tf.__version__" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "GPU", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.20" } }, "nbformat": 4, "nbformat_minor": 2 }