{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "FnV - Experiment.ipynb",
"provenance": [],
"authorship_tag": "ABX9TyPphpkx1hjLmdOaZEhz/140",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "code",
"source": [
"%matplotlib inline\n",
"\n",
"import logging\n",
"logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)\n",
"\n",
"# Code to read csv file into Colaboratory:\n",
"!pip install -U -q PyDrive\n",
"from pydrive.auth import GoogleAuth\n",
"from pydrive.drive import GoogleDrive\n",
"from google.colab import auth\n",
"from oauth2client.client import GoogleCredentials\n",
"# Authenticate and create the PyDrive client.\n",
"auth.authenticate_user()\n",
"gauth = GoogleAuth()\n",
"gauth.credentials = GoogleCredentials.get_application_default()\n",
"drive = GoogleDrive(gauth)\n",
"link = 'https://drive.google.com/open?id=1XcFFQS1ZoUOPs9vSJcA_o-Z1rvxi1Kod'\n",
"fluff, id = link.split('=')\n",
"\n",
"downloaded = drive.CreateFile({'id':id}) \n",
"downloaded.GetContentFile('wiki.mat')"
],
"metadata": {
"id": "zmziIdpUPjS2"
},
"execution_count": 1,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Data\n",
"\n",
"\n",
"\n",
"* dob: date of birth (Matlab serial date number)\n",
"*photo_taken: year when the photo was taken\n",
"*full_path: path to file\n",
"*gender: 0 for female and 1 for male, NaN if unknown\n",
"*name: name of the celebrity\n",
"*face_location: location of the face. \n",
"*face_score: detector score (the higher the better). Inf implies that no face was found in the image and the face_location then just returns the entire image\n",
"*second_face_score: detector score of the face with the second highest score. This is useful to ignore images with more than one face. second_face_score is NaN if no second face was detected.\n",
"*celeb_names (IMDB only): list of all celebrity names\n",
"*celeb_id (IMDB only): index of celebrity name\n",
"\n"
],
"metadata": {
"id": "Cad-POdXV7kC"
}
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"id": "N8p-PTdI34e4"
},
"outputs": [],
"source": [
"import scipy.io\n",
"import numpy as np\n",
"\n",
"mat = scipy.io.loadmat('wiki.mat')\n",
"fields = ('dob', 'photo_taken', 'full_path', 'gender', 'name', \n",
" 'face_location', 'face_score', 'second_face_score')\n",
"\n",
"\n",
"l = 62328\n",
"data = np.empty((0, l))\n",
"for i, field in enumerate(fields):\n",
" values = np.array([])\n",
" if field == 'face_location':\n",
" data = np.append(data, [np.empty(l)], axis=0)\n",
" continue\n",
" elif field == 'name':\n",
" values = mat['wiki'][0][0][i].flatten()\n",
" else:\n",
" values = np.hstack(mat['wiki'][0][0][i].flatten())\n",
" data = np.append(data, [values], axis=0)"
]
},
{
"cell_type": "markdown",
"source": [
"\n",
"\n",
"```\n",
"# length = 4\n",
"# [1, None, 2, None]\n",
"# [1, 2]\n",
"```\n",
"\n"
],
"metadata": {
"id": "0ykl0bhAPrVA"
}
},
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"\n",
"print(data.shape)\n",
"df = pd.DataFrame(data).transpose()\n",
"df.columns = fields\n",
"\n",
"df"
],
"metadata": {
"id": "sW4oRDTs4L4p",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 441
},
"outputId": "cb3b6739-6366-4691-802f-0fc784ebfdf8"
},
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(8, 62328)\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
" dob photo_taken full_path gender \\\n",
"0 723671.0 2009.0 17/10000217_1981-05-05_2009.jpg 1.0 \n",
"1 703186.0 1964.0 48/10000548_1925-04-04_1964.jpg 1.0 \n",
"2 711677.0 2008.0 12/100012_1948-07-03_2008.jpg 1.0 \n",
"3 705061.0 1961.0 65/10001965_1930-05-23_1961.jpg 1.0 \n",
"4 720044.0 2012.0 16/10002116_1971-05-31_2012.jpg 0.0 \n",
"... ... ... ... ... \n",
"62323 707582.0 1963.0 49/9996949_1937-04-17_1963.jpg 1.0 \n",
"62324 711338.0 1970.0 32/9997032_1947-07-30_1970.jpg 1.0 \n",
"62325 720620.0 2013.0 09/9998109_1972-12-27_2013.jpg 1.0 \n",
"62326 723893.0 2011.0 00/9999400_1981-12-13_2011.jpg 1.0 \n",
"62327 713846.0 2008.0 80/999980_1954-06-11_2008.jpg 0.0 \n",
"\n",
" name face_location face_score second_face_score \n",
"0 [Sami Jauhojärvi] 0.0 4.300962 NaN \n",
"1 [Dettmar Cramer] 0.0 2.645639 1.949248 \n",
"2 [Marc Okrand] 0.0 4.329329 NaN \n",
"3 [Aleksandar Matanović] 0.0 -inf NaN \n",
"4 [Diana Damrau] 0.0 3.408442 NaN \n",
"... ... ... ... ... \n",
"62323 [Guus Haak] 0.0 4.029268 NaN \n",
"62324 [Nico Rijnders] 0.0 -inf NaN \n",
"62325 [Michael Wiesinger] 0.0 3.494303 NaN \n",
"62326 [Johann Grugger] 0.0 -inf NaN \n",
"62327 [Greta Van Susteren] 0.0 5.486917 NaN \n",
"\n",
"[62328 rows x 8 columns]"
],
"text/html": [
"\n",
"
\n", " | dob | \n", "photo_taken | \n", "full_path | \n", "gender | \n", "name | \n", "face_location | \n", "face_score | \n", "second_face_score | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "723671.0 | \n", "2009.0 | \n", "17/10000217_1981-05-05_2009.jpg | \n", "1.0 | \n", "[Sami Jauhojärvi] | \n", "0.0 | \n", "4.300962 | \n", "NaN | \n", "
1 | \n", "703186.0 | \n", "1964.0 | \n", "48/10000548_1925-04-04_1964.jpg | \n", "1.0 | \n", "[Dettmar Cramer] | \n", "0.0 | \n", "2.645639 | \n", "1.949248 | \n", "
2 | \n", "711677.0 | \n", "2008.0 | \n", "12/100012_1948-07-03_2008.jpg | \n", "1.0 | \n", "[Marc Okrand] | \n", "0.0 | \n", "4.329329 | \n", "NaN | \n", "
3 | \n", "705061.0 | \n", "1961.0 | \n", "65/10001965_1930-05-23_1961.jpg | \n", "1.0 | \n", "[Aleksandar Matanović] | \n", "0.0 | \n", "-inf | \n", "NaN | \n", "
4 | \n", "720044.0 | \n", "2012.0 | \n", "16/10002116_1971-05-31_2012.jpg | \n", "0.0 | \n", "[Diana Damrau] | \n", "0.0 | \n", "3.408442 | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
62323 | \n", "707582.0 | \n", "1963.0 | \n", "49/9996949_1937-04-17_1963.jpg | \n", "1.0 | \n", "[Guus Haak] | \n", "0.0 | \n", "4.029268 | \n", "NaN | \n", "
62324 | \n", "711338.0 | \n", "1970.0 | \n", "32/9997032_1947-07-30_1970.jpg | \n", "1.0 | \n", "[Nico Rijnders] | \n", "0.0 | \n", "-inf | \n", "NaN | \n", "
62325 | \n", "720620.0 | \n", "2013.0 | \n", "09/9998109_1972-12-27_2013.jpg | \n", "1.0 | \n", "[Michael Wiesinger] | \n", "0.0 | \n", "3.494303 | \n", "NaN | \n", "
62326 | \n", "723893.0 | \n", "2011.0 | \n", "00/9999400_1981-12-13_2011.jpg | \n", "1.0 | \n", "[Johann Grugger] | \n", "0.0 | \n", "-inf | \n", "NaN | \n", "
62327 | \n", "713846.0 | \n", "2008.0 | \n", "80/999980_1954-06-11_2008.jpg | \n", "0.0 | \n", "[Greta Van Susteren] | \n", "0.0 | \n", "5.486917 | \n", "NaN | \n", "
62328 rows × 8 columns
\n", "