{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "FnV - Experiment.ipynb",
"provenance": [],
"authorship_tag": "ABX9TyP3QrLYzuRpaXcYfFN083H1",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "code",
"source": [
"%matplotlib inline\n",
"\n",
"import logging\n",
"logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)\n",
"\n",
"# Code to read csv file into Colaboratory:\n",
"!pip install -U -q PyDrive\n",
"from pydrive.auth import GoogleAuth\n",
"from pydrive.drive import GoogleDrive\n",
"from google.colab import auth\n",
"from oauth2client.client import GoogleCredentials\n",
"# Authenticate and create the PyDrive client.\n",
"auth.authenticate_user()\n",
"gauth = GoogleAuth()\n",
"gauth.credentials = GoogleCredentials.get_application_default()\n",
"drive = GoogleDrive(gauth)\n",
"link = 'https://drive.google.com/open?id=1XcFFQS1ZoUOPs9vSJcA_o-Z1rvxi1Kod'\n",
"fluff, id = link.split('=')\n",
"\n",
"downloaded = drive.CreateFile({'id':id}) \n",
"downloaded.GetContentFile('wiki.mat')"
],
"metadata": {
"id": "zmziIdpUPjS2"
},
"execution_count": 24,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Data\n",
"\n",
"\n",
"\n",
"* dob: date of birth (Matlab serial date number)\n",
"*photo_taken: year when the photo was taken\n",
"*full_path: path to file\n",
"*gender: 0 for female and 1 for male, NaN if unknown\n",
"*name: name of the celebrity\n",
"*face_location: location of the face. \n",
"*face_score: detector score (the higher the better). Inf implies that no face was found in the image and the face_location then just returns the entire image\n",
"*second_face_score: detector score of the face with the second highest score. This is useful to ignore images with more than one face. second_face_score is NaN if no second face was detected.\n",
"*celeb_names (IMDB only): list of all celebrity names\n",
"*celeb_id (IMDB only): index of celebrity name\n",
"\n"
],
"metadata": {
"id": "Cad-POdXV7kC"
}
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "N8p-PTdI34e4",
"outputId": "e5290e36-00e0-48e9-ed7d-fb5b9d5dfec0"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"name 62204\n",
"[['723671.0' '703186.0' '711677.0' ... '720620.0' '723893.0' '713846.0']\n",
" ['2009.0' '1964.0' '2008.0' ... '2013.0' '2011.0' '2008.0']\n",
" ['17/10000217_1981-05-05_2009.jpg' '48/10000548_1925-04-04_1964.jpg'\n",
" '12/100012_1948-07-03_2008.jpg' ... '09/9998109_1972-12-27_2013.jpg'\n",
" '00/9999400_1981-12-13_2011.jpg' '80/999980_1954-06-11_2008.jpg']\n",
" ...\n",
" ['1.0' '1.0' '1.0' ... '1.0' '1.0' '0.0']\n",
" ['4.3009623883308095' '2.6456394971903463' '4.329328832406529' ...\n",
" '3.4943031690208564' '-inf' '5.486916546849864']\n",
" ['nan' '1.9492479052091165' 'nan' ... 'nan' 'nan' 'nan']]\n"
]
}
],
"source": [
"import scipy.io\n",
"import numpy as np\n",
"\n",
"mat = scipy.io.loadmat('wiki.mat')\n",
"fields = ('dob', 'photo_taken', 'full_path', 'gender', 'name', \n",
" 'face_location', 'face_score', 'second_face_score')\n",
"\n",
"l = 62328\n",
"data = np.empty((0, l))\n",
"# data = np.array([])\n",
"for i, field in enumerate(fields):\n",
" if field == 'face_location':\n",
" data = np.append(data, [np.empty(l)], axis=0)\n",
" continue\n",
" values = np.hstack(mat['wiki'][0][0][i].flatten())\n",
" if len(values) < l:\n",
" print(field, len(values))\n",
" remainder = np.empty(l - len(values))\n",
" values = np.concatenate((values, remainder))\n",
" data = np.append(data, [values], axis=0)\n",
"\n",
"print(data)"
]
},
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"\n",
"print(data.shape)\n",
"df = pd.DataFrame(data).transpose()\n",
"df.columns = fields\n",
"\n",
"df"
],
"metadata": {
"id": "sW4oRDTs4L4p",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 441
},
"outputId": "9dfa7921-f09d-4c20-832e-1d0774ae19f6"
},
"execution_count": 58,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(8, 62328)\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
" dob photo_taken full_path gender \\\n",
"0 723671.0 2009.0 17/10000217_1981-05-05_2009.jpg 1.0 \n",
"1 703186.0 1964.0 48/10000548_1925-04-04_1964.jpg 1.0 \n",
"2 711677.0 2008.0 12/100012_1948-07-03_2008.jpg 1.0 \n",
"3 705061.0 1961.0 65/10001965_1930-05-23_1961.jpg 1.0 \n",
"4 720044.0 2012.0 16/10002116_1971-05-31_2012.jpg 0.0 \n",
"... ... ... ... ... \n",
"62323 707582.0 1963.0 49/9996949_1937-04-17_1963.jpg 1.0 \n",
"62324 711338.0 1970.0 32/9997032_1947-07-30_1970.jpg 1.0 \n",
"62325 720620.0 2013.0 09/9998109_1972-12-27_2013.jpg 1.0 \n",
"62326 723893.0 2011.0 00/9999400_1981-12-13_2011.jpg 1.0 \n",
"62327 713846.0 2008.0 80/999980_1954-06-11_2008.jpg 0.0 \n",
"\n",
" name face_location face_score \\\n",
"0 Sami Jauhojärvi 1.0 4.3009623883308095 \n",
"1 Dettmar Cramer 1.0 2.6456394971903463 \n",
"2 Marc Okrand 1.0 4.329328832406529 \n",
"3 Aleksandar Matanović 1.0 -inf \n",
"4 Diana Damrau 0.0 3.408442415222501 \n",
"... ... ... ... \n",
"62323 0.0 1.0 4.029267756985114 \n",
"62324 0.0 1.0 -inf \n",
"62325 4.68486041878186e-310 1.0 3.4943031690208564 \n",
"62326 4.68486041878186e-310 1.0 -inf \n",
"62327 6.92474272034567e-310 0.0 5.486916546849864 \n",
"\n",
" second_face_score \n",
"0 nan \n",
"1 1.9492479052091165 \n",
"2 nan \n",
"3 nan \n",
"4 nan \n",
"... ... \n",
"62323 nan \n",
"62324 nan \n",
"62325 nan \n",
"62326 nan \n",
"62327 nan \n",
"\n",
"[62328 rows x 8 columns]"
],
"text/html": [
"\n",
"
\n", " | dob | \n", "photo_taken | \n", "full_path | \n", "gender | \n", "name | \n", "face_location | \n", "face_score | \n", "second_face_score | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "723671.0 | \n", "2009.0 | \n", "17/10000217_1981-05-05_2009.jpg | \n", "1.0 | \n", "Sami Jauhojärvi | \n", "1.0 | \n", "4.3009623883308095 | \n", "nan | \n", "
1 | \n", "703186.0 | \n", "1964.0 | \n", "48/10000548_1925-04-04_1964.jpg | \n", "1.0 | \n", "Dettmar Cramer | \n", "1.0 | \n", "2.6456394971903463 | \n", "1.9492479052091165 | \n", "
2 | \n", "711677.0 | \n", "2008.0 | \n", "12/100012_1948-07-03_2008.jpg | \n", "1.0 | \n", "Marc Okrand | \n", "1.0 | \n", "4.329328832406529 | \n", "nan | \n", "
3 | \n", "705061.0 | \n", "1961.0 | \n", "65/10001965_1930-05-23_1961.jpg | \n", "1.0 | \n", "Aleksandar Matanović | \n", "1.0 | \n", "-inf | \n", "nan | \n", "
4 | \n", "720044.0 | \n", "2012.0 | \n", "16/10002116_1971-05-31_2012.jpg | \n", "0.0 | \n", "Diana Damrau | \n", "0.0 | \n", "3.408442415222501 | \n", "nan | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
62323 | \n", "707582.0 | \n", "1963.0 | \n", "49/9996949_1937-04-17_1963.jpg | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "4.029267756985114 | \n", "nan | \n", "
62324 | \n", "711338.0 | \n", "1970.0 | \n", "32/9997032_1947-07-30_1970.jpg | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "-inf | \n", "nan | \n", "
62325 | \n", "720620.0 | \n", "2013.0 | \n", "09/9998109_1972-12-27_2013.jpg | \n", "1.0 | \n", "4.68486041878186e-310 | \n", "1.0 | \n", "3.4943031690208564 | \n", "nan | \n", "
62326 | \n", "723893.0 | \n", "2011.0 | \n", "00/9999400_1981-12-13_2011.jpg | \n", "1.0 | \n", "4.68486041878186e-310 | \n", "1.0 | \n", "-inf | \n", "nan | \n", "
62327 | \n", "713846.0 | \n", "2008.0 | \n", "80/999980_1954-06-11_2008.jpg | \n", "0.0 | \n", "6.92474272034567e-310 | \n", "0.0 | \n", "5.486916546849864 | \n", "nan | \n", "
62328 rows × 8 columns
\n", "