{ "cells": [ { "cell_type": "code", "execution_count": 236, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import pickle as pkl\n", "import math\n", "import gradio as gr\n", "from tqdm.notebook import tqdm\n", "\n", "import tensorflow as tf\n", "from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input\n", "from tensorflow.keras.models import Model\n", "from tensorflow.keras.utils import load_img, img_to_array\n", "from tensorflow.keras.utils import Sequence\n", "from tensorflow.keras.preprocessing.text import Tokenizer\n", "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "from tensorflow.keras.utils import to_categorical, plot_model\n", "from tensorflow.keras import layers\n", "from tensorflow.keras.layers import Resizing" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'2.10.0'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tf.__version__" ] }, { "cell_type": "code", "execution_count": 237, "metadata": {}, "outputs": [], "source": [ "# Read data \n", "df = pd.read_csv('captions.txt')" ] }, { "cell_type": "code", "execution_count": 238, "metadata": {}, "outputs": [], "source": [ "def preprocess_text(text):\n", " text = 'startseq ' + text + ' endseq'\n", " return text" ] }, { "cell_type": "code", "execution_count": 239, "metadata": {}, "outputs": [], "source": [ "# Adding start and end token\n", "df['caption'] = df['caption'].apply(preprocess_text)" ] }, { "cell_type": "code", "execution_count": 240, "metadata": {}, "outputs": [], "source": [ "# Tokenizer\n", "tokenizer = Tokenizer(num_words=5000)\n", "tokenizer.fit_on_texts(df['caption'])" ] }, { "cell_type": "code", "execution_count": 241, "metadata": {}, "outputs": [], "source": [ "# Vocabulary Size\n", "vocab_size = len(tokenizer.word_index) + 1" ] }, { "cell_type": "code", "execution_count": 242, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "39" ] }, "execution_count": 242, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Max len caption\n", "max_len = max(len(i) for i in tokenizer.texts_to_sequences(df['caption']))" ] }, { "cell_type": "code", "execution_count": 243, "metadata": {}, "outputs": [], "source": [ "# Join all different list of caption into one\n", "df = df.groupby('image',as_index=False).aggregate({'caption':list})" ] }, { "cell_type": "code", "execution_count": 246, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | image | \n", "caption | \n", "
---|---|---|
0 | \n", "1000268201_693b08cb0e.jpg | \n", "[startseq A child in a pink dress is climbing ... | \n", "
1 | \n", "1001773457_577c3a7d70.jpg | \n", "[startseq A black dog and a spotted dog are fi... | \n", "
2 | \n", "1002674143_1b742ab4b8.jpg | \n", "[startseq A little girl covered in paint sits ... | \n", "
3 | \n", "1003163366_44323f5815.jpg | \n", "[startseq A man lays on a bench while his dog ... | \n", "
4 | \n", "1007129816_e794419615.jpg | \n", "[startseq A man in an orange hat starring at s... | \n", "