Azhary Arliansyah commited on
Commit
da7d4c3
1 Parent(s): bfe36d3

Created using Colaboratory

Browse files
Files changed (1) hide show
  1. FnV_Experiment.ipynb +453 -0
FnV_Experiment.ipynb ADDED
@@ -0,0 +1,453 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "name": "FnV - Experiment.ipynb",
7
+ "provenance": [],
8
+ "authorship_tag": "ABX9TyP3QrLYzuRpaXcYfFN083H1",
9
+ "include_colab_link": true
10
+ },
11
+ "kernelspec": {
12
+ "name": "python3",
13
+ "display_name": "Python 3"
14
+ },
15
+ "language_info": {
16
+ "name": "python"
17
+ }
18
+ },
19
+ "cells": [
20
+ {
21
+ "cell_type": "markdown",
22
+ "metadata": {
23
+ "id": "view-in-github",
24
+ "colab_type": "text"
25
+ },
26
+ "source": [
27
+ "<a href=\"https://colab.research.google.com/github/patal-dev/april/blob/main/FnV_Experiment.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "source": [
33
+ "%matplotlib inline\n",
34
+ "\n",
35
+ "import logging\n",
36
+ "logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)\n",
37
+ "\n",
38
+ "# Code to read csv file into Colaboratory:\n",
39
+ "!pip install -U -q PyDrive\n",
40
+ "from pydrive.auth import GoogleAuth\n",
41
+ "from pydrive.drive import GoogleDrive\n",
42
+ "from google.colab import auth\n",
43
+ "from oauth2client.client import GoogleCredentials\n",
44
+ "# Authenticate and create the PyDrive client.\n",
45
+ "auth.authenticate_user()\n",
46
+ "gauth = GoogleAuth()\n",
47
+ "gauth.credentials = GoogleCredentials.get_application_default()\n",
48
+ "drive = GoogleDrive(gauth)\n",
49
+ "link = 'https://drive.google.com/open?id=1XcFFQS1ZoUOPs9vSJcA_o-Z1rvxi1Kod'\n",
50
+ "fluff, id = link.split('=')\n",
51
+ "\n",
52
+ "downloaded = drive.CreateFile({'id':id}) \n",
53
+ "downloaded.GetContentFile('wiki.mat')"
54
+ ],
55
+ "metadata": {
56
+ "id": "zmziIdpUPjS2"
57
+ },
58
+ "execution_count": 24,
59
+ "outputs": []
60
+ },
61
+ {
62
+ "cell_type": "markdown",
63
+ "source": [
64
+ "## Data\n",
65
+ "\n",
66
+ "\n",
67
+ "\n",
68
+ "* dob: date of birth (Matlab serial date number)\n",
69
+ "*photo_taken: year when the photo was taken\n",
70
+ "*full_path: path to file\n",
71
+ "*gender: 0 for female and 1 for male, NaN if unknown\n",
72
+ "*name: name of the celebrity\n",
73
+ "*face_location: location of the face. \n",
74
+ "*face_score: detector score (the higher the better). Inf implies that no face was found in the image and the face_location then just returns the entire image\n",
75
+ "*second_face_score: detector score of the face with the second highest score. This is useful to ignore images with more than one face. second_face_score is NaN if no second face was detected.\n",
76
+ "*celeb_names (IMDB only): list of all celebrity names\n",
77
+ "*celeb_id (IMDB only): index of celebrity name\n",
78
+ "\n"
79
+ ],
80
+ "metadata": {
81
+ "id": "Cad-POdXV7kC"
82
+ }
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": 57,
87
+ "metadata": {
88
+ "colab": {
89
+ "base_uri": "https://localhost:8080/"
90
+ },
91
+ "id": "N8p-PTdI34e4",
92
+ "outputId": "e5290e36-00e0-48e9-ed7d-fb5b9d5dfec0"
93
+ },
94
+ "outputs": [
95
+ {
96
+ "output_type": "stream",
97
+ "name": "stdout",
98
+ "text": [
99
+ "name 62204\n",
100
+ "[['723671.0' '703186.0' '711677.0' ... '720620.0' '723893.0' '713846.0']\n",
101
+ " ['2009.0' '1964.0' '2008.0' ... '2013.0' '2011.0' '2008.0']\n",
102
+ " ['17/10000217_1981-05-05_2009.jpg' '48/10000548_1925-04-04_1964.jpg'\n",
103
+ " '12/100012_1948-07-03_2008.jpg' ... '09/9998109_1972-12-27_2013.jpg'\n",
104
+ " '00/9999400_1981-12-13_2011.jpg' '80/999980_1954-06-11_2008.jpg']\n",
105
+ " ...\n",
106
+ " ['1.0' '1.0' '1.0' ... '1.0' '1.0' '0.0']\n",
107
+ " ['4.3009623883308095' '2.6456394971903463' '4.329328832406529' ...\n",
108
+ " '3.4943031690208564' '-inf' '5.486916546849864']\n",
109
+ " ['nan' '1.9492479052091165' 'nan' ... 'nan' 'nan' 'nan']]\n"
110
+ ]
111
+ }
112
+ ],
113
+ "source": [
114
+ "import scipy.io\n",
115
+ "import numpy as np\n",
116
+ "\n",
117
+ "mat = scipy.io.loadmat('wiki.mat')\n",
118
+ "fields = ('dob', 'photo_taken', 'full_path', 'gender', 'name', \n",
119
+ " 'face_location', 'face_score', 'second_face_score')\n",
120
+ "\n",
121
+ "l = 62328\n",
122
+ "data = np.empty((0, l))\n",
123
+ "# data = np.array([])\n",
124
+ "for i, field in enumerate(fields):\n",
125
+ " if field == 'face_location':\n",
126
+ " data = np.append(data, [np.empty(l)], axis=0)\n",
127
+ " continue\n",
128
+ " values = np.hstack(mat['wiki'][0][0][i].flatten())\n",
129
+ " if len(values) < l:\n",
130
+ " print(field, len(values))\n",
131
+ " remainder = np.empty(l - len(values))\n",
132
+ " values = np.concatenate((values, remainder))\n",
133
+ " data = np.append(data, [values], axis=0)\n",
134
+ "\n",
135
+ "print(data)"
136
+ ]
137
+ },
138
+ {
139
+ "cell_type": "code",
140
+ "source": [
141
+ "import pandas as pd\n",
142
+ "\n",
143
+ "print(data.shape)\n",
144
+ "df = pd.DataFrame(data).transpose()\n",
145
+ "df.columns = fields\n",
146
+ "\n",
147
+ "df"
148
+ ],
149
+ "metadata": {
150
+ "id": "sW4oRDTs4L4p",
151
+ "colab": {
152
+ "base_uri": "https://localhost:8080/",
153
+ "height": 441
154
+ },
155
+ "outputId": "9dfa7921-f09d-4c20-832e-1d0774ae19f6"
156
+ },
157
+ "execution_count": 58,
158
+ "outputs": [
159
+ {
160
+ "output_type": "stream",
161
+ "name": "stdout",
162
+ "text": [
163
+ "(8, 62328)\n"
164
+ ]
165
+ },
166
+ {
167
+ "output_type": "execute_result",
168
+ "data": {
169
+ "text/plain": [
170
+ " dob photo_taken full_path gender \\\n",
171
+ "0 723671.0 2009.0 17/10000217_1981-05-05_2009.jpg 1.0 \n",
172
+ "1 703186.0 1964.0 48/10000548_1925-04-04_1964.jpg 1.0 \n",
173
+ "2 711677.0 2008.0 12/100012_1948-07-03_2008.jpg 1.0 \n",
174
+ "3 705061.0 1961.0 65/10001965_1930-05-23_1961.jpg 1.0 \n",
175
+ "4 720044.0 2012.0 16/10002116_1971-05-31_2012.jpg 0.0 \n",
176
+ "... ... ... ... ... \n",
177
+ "62323 707582.0 1963.0 49/9996949_1937-04-17_1963.jpg 1.0 \n",
178
+ "62324 711338.0 1970.0 32/9997032_1947-07-30_1970.jpg 1.0 \n",
179
+ "62325 720620.0 2013.0 09/9998109_1972-12-27_2013.jpg 1.0 \n",
180
+ "62326 723893.0 2011.0 00/9999400_1981-12-13_2011.jpg 1.0 \n",
181
+ "62327 713846.0 2008.0 80/999980_1954-06-11_2008.jpg 0.0 \n",
182
+ "\n",
183
+ " name face_location face_score \\\n",
184
+ "0 Sami Jauhojärvi 1.0 4.3009623883308095 \n",
185
+ "1 Dettmar Cramer 1.0 2.6456394971903463 \n",
186
+ "2 Marc Okrand 1.0 4.329328832406529 \n",
187
+ "3 Aleksandar Matanović 1.0 -inf \n",
188
+ "4 Diana Damrau 0.0 3.408442415222501 \n",
189
+ "... ... ... ... \n",
190
+ "62323 0.0 1.0 4.029267756985114 \n",
191
+ "62324 0.0 1.0 -inf \n",
192
+ "62325 4.68486041878186e-310 1.0 3.4943031690208564 \n",
193
+ "62326 4.68486041878186e-310 1.0 -inf \n",
194
+ "62327 6.92474272034567e-310 0.0 5.486916546849864 \n",
195
+ "\n",
196
+ " second_face_score \n",
197
+ "0 nan \n",
198
+ "1 1.9492479052091165 \n",
199
+ "2 nan \n",
200
+ "3 nan \n",
201
+ "4 nan \n",
202
+ "... ... \n",
203
+ "62323 nan \n",
204
+ "62324 nan \n",
205
+ "62325 nan \n",
206
+ "62326 nan \n",
207
+ "62327 nan \n",
208
+ "\n",
209
+ "[62328 rows x 8 columns]"
210
+ ],
211
+ "text/html": [
212
+ "\n",
213
+ " <div id=\"df-bba92cb7-f4b3-4205-94cc-5753d33495c5\">\n",
214
+ " <div class=\"colab-df-container\">\n",
215
+ " <div>\n",
216
+ "<style scoped>\n",
217
+ " .dataframe tbody tr th:only-of-type {\n",
218
+ " vertical-align: middle;\n",
219
+ " }\n",
220
+ "\n",
221
+ " .dataframe tbody tr th {\n",
222
+ " vertical-align: top;\n",
223
+ " }\n",
224
+ "\n",
225
+ " .dataframe thead th {\n",
226
+ " text-align: right;\n",
227
+ " }\n",
228
+ "</style>\n",
229
+ "<table border=\"1\" class=\"dataframe\">\n",
230
+ " <thead>\n",
231
+ " <tr style=\"text-align: right;\">\n",
232
+ " <th></th>\n",
233
+ " <th>dob</th>\n",
234
+ " <th>photo_taken</th>\n",
235
+ " <th>full_path</th>\n",
236
+ " <th>gender</th>\n",
237
+ " <th>name</th>\n",
238
+ " <th>face_location</th>\n",
239
+ " <th>face_score</th>\n",
240
+ " <th>second_face_score</th>\n",
241
+ " </tr>\n",
242
+ " </thead>\n",
243
+ " <tbody>\n",
244
+ " <tr>\n",
245
+ " <th>0</th>\n",
246
+ " <td>723671.0</td>\n",
247
+ " <td>2009.0</td>\n",
248
+ " <td>17/10000217_1981-05-05_2009.jpg</td>\n",
249
+ " <td>1.0</td>\n",
250
+ " <td>Sami Jauhojärvi</td>\n",
251
+ " <td>1.0</td>\n",
252
+ " <td>4.3009623883308095</td>\n",
253
+ " <td>nan</td>\n",
254
+ " </tr>\n",
255
+ " <tr>\n",
256
+ " <th>1</th>\n",
257
+ " <td>703186.0</td>\n",
258
+ " <td>1964.0</td>\n",
259
+ " <td>48/10000548_1925-04-04_1964.jpg</td>\n",
260
+ " <td>1.0</td>\n",
261
+ " <td>Dettmar Cramer</td>\n",
262
+ " <td>1.0</td>\n",
263
+ " <td>2.6456394971903463</td>\n",
264
+ " <td>1.9492479052091165</td>\n",
265
+ " </tr>\n",
266
+ " <tr>\n",
267
+ " <th>2</th>\n",
268
+ " <td>711677.0</td>\n",
269
+ " <td>2008.0</td>\n",
270
+ " <td>12/100012_1948-07-03_2008.jpg</td>\n",
271
+ " <td>1.0</td>\n",
272
+ " <td>Marc Okrand</td>\n",
273
+ " <td>1.0</td>\n",
274
+ " <td>4.329328832406529</td>\n",
275
+ " <td>nan</td>\n",
276
+ " </tr>\n",
277
+ " <tr>\n",
278
+ " <th>3</th>\n",
279
+ " <td>705061.0</td>\n",
280
+ " <td>1961.0</td>\n",
281
+ " <td>65/10001965_1930-05-23_1961.jpg</td>\n",
282
+ " <td>1.0</td>\n",
283
+ " <td>Aleksandar Matanović</td>\n",
284
+ " <td>1.0</td>\n",
285
+ " <td>-inf</td>\n",
286
+ " <td>nan</td>\n",
287
+ " </tr>\n",
288
+ " <tr>\n",
289
+ " <th>4</th>\n",
290
+ " <td>720044.0</td>\n",
291
+ " <td>2012.0</td>\n",
292
+ " <td>16/10002116_1971-05-31_2012.jpg</td>\n",
293
+ " <td>0.0</td>\n",
294
+ " <td>Diana Damrau</td>\n",
295
+ " <td>0.0</td>\n",
296
+ " <td>3.408442415222501</td>\n",
297
+ " <td>nan</td>\n",
298
+ " </tr>\n",
299
+ " <tr>\n",
300
+ " <th>...</th>\n",
301
+ " <td>...</td>\n",
302
+ " <td>...</td>\n",
303
+ " <td>...</td>\n",
304
+ " <td>...</td>\n",
305
+ " <td>...</td>\n",
306
+ " <td>...</td>\n",
307
+ " <td>...</td>\n",
308
+ " <td>...</td>\n",
309
+ " </tr>\n",
310
+ " <tr>\n",
311
+ " <th>62323</th>\n",
312
+ " <td>707582.0</td>\n",
313
+ " <td>1963.0</td>\n",
314
+ " <td>49/9996949_1937-04-17_1963.jpg</td>\n",
315
+ " <td>1.0</td>\n",
316
+ " <td>0.0</td>\n",
317
+ " <td>1.0</td>\n",
318
+ " <td>4.029267756985114</td>\n",
319
+ " <td>nan</td>\n",
320
+ " </tr>\n",
321
+ " <tr>\n",
322
+ " <th>62324</th>\n",
323
+ " <td>711338.0</td>\n",
324
+ " <td>1970.0</td>\n",
325
+ " <td>32/9997032_1947-07-30_1970.jpg</td>\n",
326
+ " <td>1.0</td>\n",
327
+ " <td>0.0</td>\n",
328
+ " <td>1.0</td>\n",
329
+ " <td>-inf</td>\n",
330
+ " <td>nan</td>\n",
331
+ " </tr>\n",
332
+ " <tr>\n",
333
+ " <th>62325</th>\n",
334
+ " <td>720620.0</td>\n",
335
+ " <td>2013.0</td>\n",
336
+ " <td>09/9998109_1972-12-27_2013.jpg</td>\n",
337
+ " <td>1.0</td>\n",
338
+ " <td>4.68486041878186e-310</td>\n",
339
+ " <td>1.0</td>\n",
340
+ " <td>3.4943031690208564</td>\n",
341
+ " <td>nan</td>\n",
342
+ " </tr>\n",
343
+ " <tr>\n",
344
+ " <th>62326</th>\n",
345
+ " <td>723893.0</td>\n",
346
+ " <td>2011.0</td>\n",
347
+ " <td>00/9999400_1981-12-13_2011.jpg</td>\n",
348
+ " <td>1.0</td>\n",
349
+ " <td>4.68486041878186e-310</td>\n",
350
+ " <td>1.0</td>\n",
351
+ " <td>-inf</td>\n",
352
+ " <td>nan</td>\n",
353
+ " </tr>\n",
354
+ " <tr>\n",
355
+ " <th>62327</th>\n",
356
+ " <td>713846.0</td>\n",
357
+ " <td>2008.0</td>\n",
358
+ " <td>80/999980_1954-06-11_2008.jpg</td>\n",
359
+ " <td>0.0</td>\n",
360
+ " <td>6.92474272034567e-310</td>\n",
361
+ " <td>0.0</td>\n",
362
+ " <td>5.486916546849864</td>\n",
363
+ " <td>nan</td>\n",
364
+ " </tr>\n",
365
+ " </tbody>\n",
366
+ "</table>\n",
367
+ "<p>62328 rows × 8 columns</p>\n",
368
+ "</div>\n",
369
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-bba92cb7-f4b3-4205-94cc-5753d33495c5')\"\n",
370
+ " title=\"Convert this dataframe to an interactive table.\"\n",
371
+ " style=\"display:none;\">\n",
372
+ " \n",
373
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
374
+ " width=\"24px\">\n",
375
+ " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
376
+ " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
377
+ " </svg>\n",
378
+ " </button>\n",
379
+ " \n",
380
+ " <style>\n",
381
+ " .colab-df-container {\n",
382
+ " display:flex;\n",
383
+ " flex-wrap:wrap;\n",
384
+ " gap: 12px;\n",
385
+ " }\n",
386
+ "\n",
387
+ " .colab-df-convert {\n",
388
+ " background-color: #E8F0FE;\n",
389
+ " border: none;\n",
390
+ " border-radius: 50%;\n",
391
+ " cursor: pointer;\n",
392
+ " display: none;\n",
393
+ " fill: #1967D2;\n",
394
+ " height: 32px;\n",
395
+ " padding: 0 0 0 0;\n",
396
+ " width: 32px;\n",
397
+ " }\n",
398
+ "\n",
399
+ " .colab-df-convert:hover {\n",
400
+ " background-color: #E2EBFA;\n",
401
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
402
+ " fill: #174EA6;\n",
403
+ " }\n",
404
+ "\n",
405
+ " [theme=dark] .colab-df-convert {\n",
406
+ " background-color: #3B4455;\n",
407
+ " fill: #D2E3FC;\n",
408
+ " }\n",
409
+ "\n",
410
+ " [theme=dark] .colab-df-convert:hover {\n",
411
+ " background-color: #434B5C;\n",
412
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
413
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
414
+ " fill: #FFFFFF;\n",
415
+ " }\n",
416
+ " </style>\n",
417
+ "\n",
418
+ " <script>\n",
419
+ " const buttonEl =\n",
420
+ " document.querySelector('#df-bba92cb7-f4b3-4205-94cc-5753d33495c5 button.colab-df-convert');\n",
421
+ " buttonEl.style.display =\n",
422
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
423
+ "\n",
424
+ " async function convertToInteractive(key) {\n",
425
+ " const element = document.querySelector('#df-bba92cb7-f4b3-4205-94cc-5753d33495c5');\n",
426
+ " const dataTable =\n",
427
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
428
+ " [key], {});\n",
429
+ " if (!dataTable) return;\n",
430
+ "\n",
431
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
432
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
433
+ " + ' to learn more about interactive tables.';\n",
434
+ " element.innerHTML = '';\n",
435
+ " dataTable['output_type'] = 'display_data';\n",
436
+ " await google.colab.output.renderOutput(dataTable, element);\n",
437
+ " const docLink = document.createElement('div');\n",
438
+ " docLink.innerHTML = docLinkHtml;\n",
439
+ " element.appendChild(docLink);\n",
440
+ " }\n",
441
+ " </script>\n",
442
+ " </div>\n",
443
+ " </div>\n",
444
+ " "
445
+ ]
446
+ },
447
+ "metadata": {},
448
+ "execution_count": 58
449
+ }
450
+ ]
451
+ }
452
+ ]
453
+ }