{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "FnV - Experiment.ipynb", "provenance": [], "authorship_tag": "ABX9TyPphpkx1hjLmdOaZEhz/140", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "source": [ "%matplotlib inline\n", "\n", "import logging\n", "logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)\n", "\n", "# Code to read csv file into Colaboratory:\n", "!pip install -U -q PyDrive\n", "from pydrive.auth import GoogleAuth\n", "from pydrive.drive import GoogleDrive\n", "from google.colab import auth\n", "from oauth2client.client import GoogleCredentials\n", "# Authenticate and create the PyDrive client.\n", "auth.authenticate_user()\n", "gauth = GoogleAuth()\n", "gauth.credentials = GoogleCredentials.get_application_default()\n", "drive = GoogleDrive(gauth)\n", "link = 'https://drive.google.com/open?id=1XcFFQS1ZoUOPs9vSJcA_o-Z1rvxi1Kod'\n", "fluff, id = link.split('=')\n", "\n", "downloaded = drive.CreateFile({'id':id}) \n", "downloaded.GetContentFile('wiki.mat')" ], "metadata": { "id": "zmziIdpUPjS2" }, "execution_count": 1, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Data\n", "\n", "\n", "\n", "* dob: date of birth (Matlab serial date number)\n", "*photo_taken: year when the photo was taken\n", "*full_path: path to file\n", "*gender: 0 for female and 1 for male, NaN if unknown\n", "*name: name of the celebrity\n", "*face_location: location of the face. \n", "*face_score: detector score (the higher the better). Inf implies that no face was found in the image and the face_location then just returns the entire image\n", "*second_face_score: detector score of the face with the second highest score. This is useful to ignore images with more than one face. second_face_score is NaN if no second face was detected.\n", "*celeb_names (IMDB only): list of all celebrity names\n", "*celeb_id (IMDB only): index of celebrity name\n", "\n" ], "metadata": { "id": "Cad-POdXV7kC" } }, { "cell_type": "code", "execution_count": 22, "metadata": { "id": "N8p-PTdI34e4" }, "outputs": [], "source": [ "import scipy.io\n", "import numpy as np\n", "\n", "mat = scipy.io.loadmat('wiki.mat')\n", "fields = ('dob', 'photo_taken', 'full_path', 'gender', 'name', \n", " 'face_location', 'face_score', 'second_face_score')\n", "\n", "\n", "l = 62328\n", "data = np.empty((0, l))\n", "for i, field in enumerate(fields):\n", " values = np.array([])\n", " if field == 'face_location':\n", " data = np.append(data, [np.empty(l)], axis=0)\n", " continue\n", " elif field == 'name':\n", " values = mat['wiki'][0][0][i].flatten()\n", " else:\n", " values = np.hstack(mat['wiki'][0][0][i].flatten())\n", " data = np.append(data, [values], axis=0)" ] }, { "cell_type": "markdown", "source": [ "\n", "\n", "```\n", "# length = 4\n", "# [1, None, 2, None]\n", "# [1, 2]\n", "```\n", "\n" ], "metadata": { "id": "0ykl0bhAPrVA" } }, { "cell_type": "code", "source": [ "import pandas as pd\n", "\n", "print(data.shape)\n", "df = pd.DataFrame(data).transpose()\n", "df.columns = fields\n", "\n", "df" ], "metadata": { "id": "sW4oRDTs4L4p", "colab": { "base_uri": "https://localhost:8080/", "height": 441 }, "outputId": "cb3b6739-6366-4691-802f-0fc784ebfdf8" }, "execution_count": 23, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "(8, 62328)\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ " dob photo_taken full_path gender \\\n", "0 723671.0 2009.0 17/10000217_1981-05-05_2009.jpg 1.0 \n", "1 703186.0 1964.0 48/10000548_1925-04-04_1964.jpg 1.0 \n", "2 711677.0 2008.0 12/100012_1948-07-03_2008.jpg 1.0 \n", "3 705061.0 1961.0 65/10001965_1930-05-23_1961.jpg 1.0 \n", "4 720044.0 2012.0 16/10002116_1971-05-31_2012.jpg 0.0 \n", "... ... ... ... ... \n", "62323 707582.0 1963.0 49/9996949_1937-04-17_1963.jpg 1.0 \n", "62324 711338.0 1970.0 32/9997032_1947-07-30_1970.jpg 1.0 \n", "62325 720620.0 2013.0 09/9998109_1972-12-27_2013.jpg 1.0 \n", "62326 723893.0 2011.0 00/9999400_1981-12-13_2011.jpg 1.0 \n", "62327 713846.0 2008.0 80/999980_1954-06-11_2008.jpg 0.0 \n", "\n", " name face_location face_score second_face_score \n", "0 [Sami Jauhojärvi] 0.0 4.300962 NaN \n", "1 [Dettmar Cramer] 0.0 2.645639 1.949248 \n", "2 [Marc Okrand] 0.0 4.329329 NaN \n", "3 [Aleksandar Matanović] 0.0 -inf NaN \n", "4 [Diana Damrau] 0.0 3.408442 NaN \n", "... ... ... ... ... \n", "62323 [Guus Haak] 0.0 4.029268 NaN \n", "62324 [Nico Rijnders] 0.0 -inf NaN \n", "62325 [Michael Wiesinger] 0.0 3.494303 NaN \n", "62326 [Johann Grugger] 0.0 -inf NaN \n", "62327 [Greta Van Susteren] 0.0 5.486917 NaN \n", "\n", "[62328 rows x 8 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dobphoto_takenfull_pathgendernameface_locationface_scoresecond_face_score
0723671.02009.017/10000217_1981-05-05_2009.jpg1.0[Sami Jauhojärvi]0.04.300962NaN
1703186.01964.048/10000548_1925-04-04_1964.jpg1.0[Dettmar Cramer]0.02.6456391.949248
2711677.02008.012/100012_1948-07-03_2008.jpg1.0[Marc Okrand]0.04.329329NaN
3705061.01961.065/10001965_1930-05-23_1961.jpg1.0[Aleksandar Matanović]0.0-infNaN
4720044.02012.016/10002116_1971-05-31_2012.jpg0.0[Diana Damrau]0.03.408442NaN
...........................
62323707582.01963.049/9996949_1937-04-17_1963.jpg1.0[Guus Haak]0.04.029268NaN
62324711338.01970.032/9997032_1947-07-30_1970.jpg1.0[Nico Rijnders]0.0-infNaN
62325720620.02013.009/9998109_1972-12-27_2013.jpg1.0[Michael Wiesinger]0.03.494303NaN
62326723893.02011.000/9999400_1981-12-13_2011.jpg1.0[Johann Grugger]0.0-infNaN
62327713846.02008.080/999980_1954-06-11_2008.jpg0.0[Greta Van Susteren]0.05.486917NaN
\n", "

62328 rows × 8 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 23 } ] } ] }