cakiki commited on
Commit
0ffbac7
·
1 Parent(s): 006754f

Add visualization

Browse files
.ipynb_checkpoints/facets-dive-checkpoint.ipynb ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "d5d0ea64",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "data": {
11
+ "text/html": [
12
+ "<style>.container { width:95% !important; }</style>"
13
+ ],
14
+ "text/plain": [
15
+ "<IPython.core.display.HTML object>"
16
+ ]
17
+ },
18
+ "metadata": {},
19
+ "output_type": "display_data"
20
+ }
21
+ ],
22
+ "source": [
23
+ "from IPython.core.display import display, HTML, Image\n",
24
+ "display(HTML(\"<style>.container { width:95% !important; }</style>\"))\n",
25
+ "%config IPCompleter.use_jedi=False"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 8,
31
+ "id": "403c4b8a",
32
+ "metadata": {},
33
+ "outputs": [],
34
+ "source": [
35
+ "import pandas as pd\n",
36
+ "from IPython.display import Markdown, display, HTML, IFrame\n",
37
+ "from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator\n",
38
+ "import base64"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": 3,
44
+ "id": "1c48706a",
45
+ "metadata": {},
46
+ "outputs": [],
47
+ "source": [
48
+ "df = pd.read_csv('./adult.csv')"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": 6,
54
+ "id": "c000f04d",
55
+ "metadata": {},
56
+ "outputs": [
57
+ {
58
+ "data": {
59
+ "text/plain": [
60
+ "Private 22696\n",
61
+ "Self-emp-not-inc 2541\n",
62
+ "Local-gov 2093\n",
63
+ "? 1836\n",
64
+ "State-gov 1298\n",
65
+ "Self-emp-inc 1116\n",
66
+ "Federal-gov 960\n",
67
+ "Without-pay 14\n",
68
+ "Never-worked 7\n",
69
+ "Name: workclass, dtype: int64"
70
+ ]
71
+ },
72
+ "execution_count": 6,
73
+ "metadata": {},
74
+ "output_type": "execute_result"
75
+ }
76
+ ],
77
+ "source": [
78
+ "df['workclass'].value_counts()"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": 10,
84
+ "id": "85b71af5",
85
+ "metadata": {},
86
+ "outputs": [],
87
+ "source": [
88
+ "sprite_size = 32 if len(df.index)>50000 else 64\n",
89
+ "\n",
90
+ "jsonstr = df.to_json(orient='records') \n",
91
+ "HTML_TEMPLATE = \"\"\"\n",
92
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"></script>\n",
93
+ " <link rel=\"import\" href=\"https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html\">\n",
94
+ " <facets-dive sprite-image-width=\"{sprite_size}\" sprite-image-height=\"{sprite_size}\" id=\"elem\" height=\"1200\"></facets-dive>\n",
95
+ " <script>\n",
96
+ " document.querySelector(\"#elem\").data = {jsonstr};\n",
97
+ " </script>\"\"\"\n",
98
+ "html = HTML_TEMPLATE.format(jsonstr=jsonstr, sprite_size=sprite_size)\n",
99
+ "with open(\"index.html\",'w') as fo:\n",
100
+ " fo.write(html)"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": null,
106
+ "id": "fce8e9f4",
107
+ "metadata": {},
108
+ "outputs": [],
109
+ "source": []
110
+ }
111
+ ],
112
+ "metadata": {
113
+ "kernelspec": {
114
+ "display_name": "Python 3 (ipykernel)",
115
+ "language": "python",
116
+ "name": "python3"
117
+ },
118
+ "language_info": {
119
+ "codemirror_mode": {
120
+ "name": "ipython",
121
+ "version": 3
122
+ },
123
+ "file_extension": ".py",
124
+ "mimetype": "text/x-python",
125
+ "name": "python",
126
+ "nbconvert_exporter": "python",
127
+ "pygments_lexer": "ipython3",
128
+ "version": "3.9.7"
129
+ }
130
+ },
131
+ "nbformat": 4,
132
+ "nbformat_minor": 5
133
+ }
Dockerfile ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ FROM jupyter/base-notebook:latest
2
+
3
+ RUN pip install --use-feature=2020-resolver pandas facets-overview
Makefile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ VERSION := 0.0.1
2
+ NAME := facets-dive
3
+ REPO := cakiki
4
+
5
+ build:
6
+ docker build -f Dockerfile -t ${REPO}/${NAME}:${VERSION} -t ${REPO}/${NAME}:latest .
7
+
8
+ run: build
9
+ docker run --rm -it -p 8888:8888 --mount type=bind,source=${PWD},target=/home/jovyan/work --name ${NAME} --workdir=/home/jovyan/work ${REPO}/${NAME}:${VERSION} && make -s clean
10
+
11
+ push: build
12
+ docker push ${REPO}/${NAME}:${VERSION} && docker push ${REPO}/${NAME}:latest
13
+
adult.csv ADDED
The diff for this file is too large to render. See raw diff
 
facets-dive.ipynb ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "d5d0ea64",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "data": {
11
+ "text/html": [
12
+ "<style>.container { width:95% !important; }</style>"
13
+ ],
14
+ "text/plain": [
15
+ "<IPython.core.display.HTML object>"
16
+ ]
17
+ },
18
+ "metadata": {},
19
+ "output_type": "display_data"
20
+ }
21
+ ],
22
+ "source": [
23
+ "from IPython.core.display import display, HTML, Image\n",
24
+ "display(HTML(\"<style>.container { width:95% !important; }</style>\"))\n",
25
+ "%config IPCompleter.use_jedi=False"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 8,
31
+ "id": "403c4b8a",
32
+ "metadata": {},
33
+ "outputs": [],
34
+ "source": [
35
+ "import pandas as pd\n",
36
+ "from IPython.display import Markdown, display, HTML, IFrame\n",
37
+ "from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator\n",
38
+ "import base64"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": 3,
44
+ "id": "1c48706a",
45
+ "metadata": {},
46
+ "outputs": [],
47
+ "source": [
48
+ "df = pd.read_csv('./adult.csv')"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": 6,
54
+ "id": "c000f04d",
55
+ "metadata": {},
56
+ "outputs": [
57
+ {
58
+ "data": {
59
+ "text/plain": [
60
+ "Private 22696\n",
61
+ "Self-emp-not-inc 2541\n",
62
+ "Local-gov 2093\n",
63
+ "? 1836\n",
64
+ "State-gov 1298\n",
65
+ "Self-emp-inc 1116\n",
66
+ "Federal-gov 960\n",
67
+ "Without-pay 14\n",
68
+ "Never-worked 7\n",
69
+ "Name: workclass, dtype: int64"
70
+ ]
71
+ },
72
+ "execution_count": 6,
73
+ "metadata": {},
74
+ "output_type": "execute_result"
75
+ }
76
+ ],
77
+ "source": [
78
+ "df['workclass'].value_counts()"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": 10,
84
+ "id": "85b71af5",
85
+ "metadata": {},
86
+ "outputs": [],
87
+ "source": [
88
+ "sprite_size = 32 if len(df.index)>50000 else 64\n",
89
+ "\n",
90
+ "jsonstr = df.to_json(orient='records') \n",
91
+ "HTML_TEMPLATE = \"\"\"\n",
92
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"></script>\n",
93
+ " <link rel=\"import\" href=\"https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html\">\n",
94
+ " <facets-dive sprite-image-width=\"{sprite_size}\" sprite-image-height=\"{sprite_size}\" id=\"elem\" height=\"1200\"></facets-dive>\n",
95
+ " <script>\n",
96
+ " document.querySelector(\"#elem\").data = {jsonstr};\n",
97
+ " </script>\"\"\"\n",
98
+ "html = HTML_TEMPLATE.format(jsonstr=jsonstr, sprite_size=sprite_size)\n",
99
+ "with open(\"index.html\",'w') as fo:\n",
100
+ " fo.write(html)"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": null,
106
+ "id": "fce8e9f4",
107
+ "metadata": {},
108
+ "outputs": [],
109
+ "source": []
110
+ }
111
+ ],
112
+ "metadata": {
113
+ "kernelspec": {
114
+ "display_name": "Python 3 (ipykernel)",
115
+ "language": "python",
116
+ "name": "python3"
117
+ },
118
+ "language_info": {
119
+ "codemirror_mode": {
120
+ "name": "ipython",
121
+ "version": 3
122
+ },
123
+ "file_extension": ".py",
124
+ "mimetype": "text/x-python",
125
+ "name": "python",
126
+ "nbconvert_exporter": "python",
127
+ "pygments_lexer": "ipython3",
128
+ "version": "3.9.7"
129
+ }
130
+ },
131
+ "nbformat": 4,
132
+ "nbformat_minor": 5
133
+ }
index.html CHANGED
The diff for this file is too large to render. See raw diff