{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "FnV - Experiment.ipynb", "provenance": [], "authorship_tag": "ABX9TyPphpkx1hjLmdOaZEhz/140", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "" ] }, { "cell_type": "code", "source": [ "%matplotlib inline\n", "\n", "import logging\n", "logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)\n", "\n", "# Code to read csv file into Colaboratory:\n", "!pip install -U -q PyDrive\n", "from pydrive.auth import GoogleAuth\n", "from pydrive.drive import GoogleDrive\n", "from google.colab import auth\n", "from oauth2client.client import GoogleCredentials\n", "# Authenticate and create the PyDrive client.\n", "auth.authenticate_user()\n", "gauth = GoogleAuth()\n", "gauth.credentials = GoogleCredentials.get_application_default()\n", "drive = GoogleDrive(gauth)\n", "link = 'https://drive.google.com/open?id=1XcFFQS1ZoUOPs9vSJcA_o-Z1rvxi1Kod'\n", "fluff, id = link.split('=')\n", "\n", "downloaded = drive.CreateFile({'id':id}) \n", "downloaded.GetContentFile('wiki.mat')" ], "metadata": { "id": "zmziIdpUPjS2" }, "execution_count": 1, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Data\n", "\n", "\n", "\n", "* dob: date of birth (Matlab serial date number)\n", "*photo_taken: year when the photo was taken\n", "*full_path: path to file\n", "*gender: 0 for female and 1 for male, NaN if unknown\n", "*name: name of the celebrity\n", "*face_location: location of the face. \n", "*face_score: detector score (the higher the better). Inf implies that no face was found in the image and the face_location then just returns the entire image\n", "*second_face_score: detector score of the face with the second highest score. This is useful to ignore images with more than one face. second_face_score is NaN if no second face was detected.\n", "*celeb_names (IMDB only): list of all celebrity names\n", "*celeb_id (IMDB only): index of celebrity name\n", "\n" ], "metadata": { "id": "Cad-POdXV7kC" } }, { "cell_type": "code", "execution_count": 22, "metadata": { "id": "N8p-PTdI34e4" }, "outputs": [], "source": [ "import scipy.io\n", "import numpy as np\n", "\n", "mat = scipy.io.loadmat('wiki.mat')\n", "fields = ('dob', 'photo_taken', 'full_path', 'gender', 'name', \n", " 'face_location', 'face_score', 'second_face_score')\n", "\n", "\n", "l = 62328\n", "data = np.empty((0, l))\n", "for i, field in enumerate(fields):\n", " values = np.array([])\n", " if field == 'face_location':\n", " data = np.append(data, [np.empty(l)], axis=0)\n", " continue\n", " elif field == 'name':\n", " values = mat['wiki'][0][0][i].flatten()\n", " else:\n", " values = np.hstack(mat['wiki'][0][0][i].flatten())\n", " data = np.append(data, [values], axis=0)" ] }, { "cell_type": "markdown", "source": [ "\n", "\n", "```\n", "# length = 4\n", "# [1, None, 2, None]\n", "# [1, 2]\n", "```\n", "\n" ], "metadata": { "id": "0ykl0bhAPrVA" } }, { "cell_type": "code", "source": [ "import pandas as pd\n", "\n", "print(data.shape)\n", "df = pd.DataFrame(data).transpose()\n", "df.columns = fields\n", "\n", "df" ], "metadata": { "id": "sW4oRDTs4L4p", "colab": { "base_uri": "https://localhost:8080/", "height": 441 }, "outputId": "cb3b6739-6366-4691-802f-0fc784ebfdf8" }, "execution_count": 23, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "(8, 62328)\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ " dob photo_taken full_path gender \\\n", "0 723671.0 2009.0 17/10000217_1981-05-05_2009.jpg 1.0 \n", "1 703186.0 1964.0 48/10000548_1925-04-04_1964.jpg 1.0 \n", "2 711677.0 2008.0 12/100012_1948-07-03_2008.jpg 1.0 \n", "3 705061.0 1961.0 65/10001965_1930-05-23_1961.jpg 1.0 \n", "4 720044.0 2012.0 16/10002116_1971-05-31_2012.jpg 0.0 \n", "... ... ... ... ... \n", "62323 707582.0 1963.0 49/9996949_1937-04-17_1963.jpg 1.0 \n", "62324 711338.0 1970.0 32/9997032_1947-07-30_1970.jpg 1.0 \n", "62325 720620.0 2013.0 09/9998109_1972-12-27_2013.jpg 1.0 \n", "62326 723893.0 2011.0 00/9999400_1981-12-13_2011.jpg 1.0 \n", "62327 713846.0 2008.0 80/999980_1954-06-11_2008.jpg 0.0 \n", "\n", " name face_location face_score second_face_score \n", "0 [Sami Jauhojärvi] 0.0 4.300962 NaN \n", "1 [Dettmar Cramer] 0.0 2.645639 1.949248 \n", "2 [Marc Okrand] 0.0 4.329329 NaN \n", "3 [Aleksandar Matanović] 0.0 -inf NaN \n", "4 [Diana Damrau] 0.0 3.408442 NaN \n", "... ... ... ... ... \n", "62323 [Guus Haak] 0.0 4.029268 NaN \n", "62324 [Nico Rijnders] 0.0 -inf NaN \n", "62325 [Michael Wiesinger] 0.0 3.494303 NaN \n", "62326 [Johann Grugger] 0.0 -inf NaN \n", "62327 [Greta Van Susteren] 0.0 5.486917 NaN \n", "\n", "[62328 rows x 8 columns]" ], "text/html": [ "\n", "
\n", " | dob | \n", "photo_taken | \n", "full_path | \n", "gender | \n", "name | \n", "face_location | \n", "face_score | \n", "second_face_score | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "723671.0 | \n", "2009.0 | \n", "17/10000217_1981-05-05_2009.jpg | \n", "1.0 | \n", "[Sami Jauhojärvi] | \n", "0.0 | \n", "4.300962 | \n", "NaN | \n", "
1 | \n", "703186.0 | \n", "1964.0 | \n", "48/10000548_1925-04-04_1964.jpg | \n", "1.0 | \n", "[Dettmar Cramer] | \n", "0.0 | \n", "2.645639 | \n", "1.949248 | \n", "
2 | \n", "711677.0 | \n", "2008.0 | \n", "12/100012_1948-07-03_2008.jpg | \n", "1.0 | \n", "[Marc Okrand] | \n", "0.0 | \n", "4.329329 | \n", "NaN | \n", "
3 | \n", "705061.0 | \n", "1961.0 | \n", "65/10001965_1930-05-23_1961.jpg | \n", "1.0 | \n", "[Aleksandar Matanović] | \n", "0.0 | \n", "-inf | \n", "NaN | \n", "
4 | \n", "720044.0 | \n", "2012.0 | \n", "16/10002116_1971-05-31_2012.jpg | \n", "0.0 | \n", "[Diana Damrau] | \n", "0.0 | \n", "3.408442 | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
62323 | \n", "707582.0 | \n", "1963.0 | \n", "49/9996949_1937-04-17_1963.jpg | \n", "1.0 | \n", "[Guus Haak] | \n", "0.0 | \n", "4.029268 | \n", "NaN | \n", "
62324 | \n", "711338.0 | \n", "1970.0 | \n", "32/9997032_1947-07-30_1970.jpg | \n", "1.0 | \n", "[Nico Rijnders] | \n", "0.0 | \n", "-inf | \n", "NaN | \n", "
62325 | \n", "720620.0 | \n", "2013.0 | \n", "09/9998109_1972-12-27_2013.jpg | \n", "1.0 | \n", "[Michael Wiesinger] | \n", "0.0 | \n", "3.494303 | \n", "NaN | \n", "
62326 | \n", "723893.0 | \n", "2011.0 | \n", "00/9999400_1981-12-13_2011.jpg | \n", "1.0 | \n", "[Johann Grugger] | \n", "0.0 | \n", "-inf | \n", "NaN | \n", "
62327 | \n", "713846.0 | \n", "2008.0 | \n", "80/999980_1954-06-11_2008.jpg | \n", "0.0 | \n", "[Greta Van Susteren] | \n", "0.0 | \n", "5.486917 | \n", "NaN | \n", "
62328 rows × 8 columns
\n", "