{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "FnV - Experiment.ipynb", "provenance": [], "authorship_tag": "ABX9TyP3QrLYzuRpaXcYfFN083H1", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "source": [ "%matplotlib inline\n", "\n", "import logging\n", "logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)\n", "\n", "# Code to read csv file into Colaboratory:\n", "!pip install -U -q PyDrive\n", "from pydrive.auth import GoogleAuth\n", "from pydrive.drive import GoogleDrive\n", "from google.colab import auth\n", "from oauth2client.client import GoogleCredentials\n", "# Authenticate and create the PyDrive client.\n", "auth.authenticate_user()\n", "gauth = GoogleAuth()\n", "gauth.credentials = GoogleCredentials.get_application_default()\n", "drive = GoogleDrive(gauth)\n", "link = 'https://drive.google.com/open?id=1XcFFQS1ZoUOPs9vSJcA_o-Z1rvxi1Kod'\n", "fluff, id = link.split('=')\n", "\n", "downloaded = drive.CreateFile({'id':id}) \n", "downloaded.GetContentFile('wiki.mat')" ], "metadata": { "id": "zmziIdpUPjS2" }, "execution_count": 24, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Data\n", "\n", "\n", "\n", "* dob: date of birth (Matlab serial date number)\n", "*photo_taken: year when the photo was taken\n", "*full_path: path to file\n", "*gender: 0 for female and 1 for male, NaN if unknown\n", "*name: name of the celebrity\n", "*face_location: location of the face. \n", "*face_score: detector score (the higher the better). Inf implies that no face was found in the image and the face_location then just returns the entire image\n", "*second_face_score: detector score of the face with the second highest score. This is useful to ignore images with more than one face. second_face_score is NaN if no second face was detected.\n", "*celeb_names (IMDB only): list of all celebrity names\n", "*celeb_id (IMDB only): index of celebrity name\n", "\n" ], "metadata": { "id": "Cad-POdXV7kC" } }, { "cell_type": "code", "execution_count": 57, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "N8p-PTdI34e4", "outputId": "e5290e36-00e0-48e9-ed7d-fb5b9d5dfec0" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "name 62204\n", "[['723671.0' '703186.0' '711677.0' ... '720620.0' '723893.0' '713846.0']\n", " ['2009.0' '1964.0' '2008.0' ... '2013.0' '2011.0' '2008.0']\n", " ['17/10000217_1981-05-05_2009.jpg' '48/10000548_1925-04-04_1964.jpg'\n", " '12/100012_1948-07-03_2008.jpg' ... '09/9998109_1972-12-27_2013.jpg'\n", " '00/9999400_1981-12-13_2011.jpg' '80/999980_1954-06-11_2008.jpg']\n", " ...\n", " ['1.0' '1.0' '1.0' ... '1.0' '1.0' '0.0']\n", " ['4.3009623883308095' '2.6456394971903463' '4.329328832406529' ...\n", " '3.4943031690208564' '-inf' '5.486916546849864']\n", " ['nan' '1.9492479052091165' 'nan' ... 'nan' 'nan' 'nan']]\n" ] } ], "source": [ "import scipy.io\n", "import numpy as np\n", "\n", "mat = scipy.io.loadmat('wiki.mat')\n", "fields = ('dob', 'photo_taken', 'full_path', 'gender', 'name', \n", " 'face_location', 'face_score', 'second_face_score')\n", "\n", "l = 62328\n", "data = np.empty((0, l))\n", "# data = np.array([])\n", "for i, field in enumerate(fields):\n", " if field == 'face_location':\n", " data = np.append(data, [np.empty(l)], axis=0)\n", " continue\n", " values = np.hstack(mat['wiki'][0][0][i].flatten())\n", " if len(values) < l:\n", " print(field, len(values))\n", " remainder = np.empty(l - len(values))\n", " values = np.concatenate((values, remainder))\n", " data = np.append(data, [values], axis=0)\n", "\n", "print(data)" ] }, { "cell_type": "code", "source": [ "import pandas as pd\n", "\n", "print(data.shape)\n", "df = pd.DataFrame(data).transpose()\n", "df.columns = fields\n", "\n", "df" ], "metadata": { "id": "sW4oRDTs4L4p", "colab": { "base_uri": "https://localhost:8080/", "height": 441 }, "outputId": "9dfa7921-f09d-4c20-832e-1d0774ae19f6" }, "execution_count": 58, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "(8, 62328)\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ " dob photo_taken full_path gender \\\n", "0 723671.0 2009.0 17/10000217_1981-05-05_2009.jpg 1.0 \n", "1 703186.0 1964.0 48/10000548_1925-04-04_1964.jpg 1.0 \n", "2 711677.0 2008.0 12/100012_1948-07-03_2008.jpg 1.0 \n", "3 705061.0 1961.0 65/10001965_1930-05-23_1961.jpg 1.0 \n", "4 720044.0 2012.0 16/10002116_1971-05-31_2012.jpg 0.0 \n", "... ... ... ... ... \n", "62323 707582.0 1963.0 49/9996949_1937-04-17_1963.jpg 1.0 \n", "62324 711338.0 1970.0 32/9997032_1947-07-30_1970.jpg 1.0 \n", "62325 720620.0 2013.0 09/9998109_1972-12-27_2013.jpg 1.0 \n", "62326 723893.0 2011.0 00/9999400_1981-12-13_2011.jpg 1.0 \n", "62327 713846.0 2008.0 80/999980_1954-06-11_2008.jpg 0.0 \n", "\n", " name face_location face_score \\\n", "0 Sami Jauhojärvi 1.0 4.3009623883308095 \n", "1 Dettmar Cramer 1.0 2.6456394971903463 \n", "2 Marc Okrand 1.0 4.329328832406529 \n", "3 Aleksandar Matanović 1.0 -inf \n", "4 Diana Damrau 0.0 3.408442415222501 \n", "... ... ... ... \n", "62323 0.0 1.0 4.029267756985114 \n", "62324 0.0 1.0 -inf \n", "62325 4.68486041878186e-310 1.0 3.4943031690208564 \n", "62326 4.68486041878186e-310 1.0 -inf \n", "62327 6.92474272034567e-310 0.0 5.486916546849864 \n", "\n", " second_face_score \n", "0 nan \n", "1 1.9492479052091165 \n", "2 nan \n", "3 nan \n", "4 nan \n", "... ... \n", "62323 nan \n", "62324 nan \n", "62325 nan \n", "62326 nan \n", "62327 nan \n", "\n", "[62328 rows x 8 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dobphoto_takenfull_pathgendernameface_locationface_scoresecond_face_score
0723671.02009.017/10000217_1981-05-05_2009.jpg1.0Sami Jauhojärvi1.04.3009623883308095nan
1703186.01964.048/10000548_1925-04-04_1964.jpg1.0Dettmar Cramer1.02.64563949719034631.9492479052091165
2711677.02008.012/100012_1948-07-03_2008.jpg1.0Marc Okrand1.04.329328832406529nan
3705061.01961.065/10001965_1930-05-23_1961.jpg1.0Aleksandar Matanović1.0-infnan
4720044.02012.016/10002116_1971-05-31_2012.jpg0.0Diana Damrau0.03.408442415222501nan
...........................
62323707582.01963.049/9996949_1937-04-17_1963.jpg1.00.01.04.029267756985114nan
62324711338.01970.032/9997032_1947-07-30_1970.jpg1.00.01.0-infnan
62325720620.02013.009/9998109_1972-12-27_2013.jpg1.04.68486041878186e-3101.03.4943031690208564nan
62326723893.02011.000/9999400_1981-12-13_2011.jpg1.04.68486041878186e-3101.0-infnan
62327713846.02008.080/999980_1954-06-11_2008.jpg0.06.92474272034567e-3100.05.486916546849864nan
\n", "

62328 rows × 8 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 58 } ] } ] }