File size: 7,533 Bytes
1048854
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d5d0ea64",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>.container { width:95% !important; }</style>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from IPython.core.display import display, HTML, Image\n",
    "display(HTML(\"<style>.container { width:95% !important; }</style>\"))\n",
    "%config IPCompleter.use_jedi=False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "403c4b8a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from IPython.display import Markdown, display, HTML, IFrame\n",
    "from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator\n",
    "import base64"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1c48706a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('./adult.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b512f166",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 32561 entries, 0 to 32560\n",
      "Data columns (total 15 columns):\n",
      " #   Column          Non-Null Count  Dtype \n",
      "---  ------          --------------  ----- \n",
      " 0   age             32561 non-null  int64 \n",
      " 1   workclass       32561 non-null  object\n",
      " 2   fnlwgt          32561 non-null  int64 \n",
      " 3   education       32561 non-null  object\n",
      " 4   education.num   32561 non-null  int64 \n",
      " 5   marital.status  32561 non-null  object\n",
      " 6   occupation      32561 non-null  object\n",
      " 7   relationship    32561 non-null  object\n",
      " 8   race            32561 non-null  object\n",
      " 9   sex             32561 non-null  object\n",
      " 10  capital.gain    32561 non-null  int64 \n",
      " 11  capital.loss    32561 non-null  int64 \n",
      " 12  hours.per.week  32561 non-null  int64 \n",
      " 13  native.country  32561 non-null  object\n",
      " 14  income          32561 non-null  object\n",
      "dtypes: int64(6), object(9)\n",
      "memory usage: 3.7+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "fce8e9f4",
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "string indices must be integers",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m/tmp/ipykernel_28/1621212634.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mproto\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGenericFeatureStatisticsGenerator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mProtoFromDataFrames\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mprotostr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbase64\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mb64encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproto\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSerializeToString\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"utf-8\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m HTML_TEMPLATE = \"\"\"\n\u001b[1;32m      4\u001b[0m         \u001b[0;34m<\u001b[0m\u001b[0mscript\u001b[0m \u001b[0msrc\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;34m<\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0mscript\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m         \u001b[0;34m<\u001b[0m\u001b[0mlink\u001b[0m \u001b[0mrel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"import\"\u001b[0m \u001b[0mhref\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html\"\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/opt/conda/lib/python3.9/site-packages/facets_overview/base_generic_feature_statistics_generator.py\u001b[0m in \u001b[0;36mProtoFromDataFrames\u001b[0;34m(self, dataframes, histogram_categorical_levels_count)\u001b[0m\n\u001b[1;32m     49\u001b[0m     \u001b[0mdatasets\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     50\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mdataframe\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdataframes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 51\u001b[0;31m       \u001b[0mtable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataframe\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'table'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     52\u001b[0m       \u001b[0mtable_entries\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     53\u001b[0m       \u001b[0;32mfor\u001b[0m \u001b[0mcol\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: string indices must be integers"
     ]
    }
   ],
   "source": [
    "proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(df)\n",
    "protostr = base64.b64encode(proto.SerializeToString()).decode(\"utf-8\")\n",
    "HTML_TEMPLATE = \"\"\"\n",
    "        <script src=\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"></script>\n",
    "        <link rel=\"import\" href=\"https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html\">\n",
    "        <facets-overview id=\"elem\"></facets-overview>\n",
    "        <script>\n",
    "          document.querySelector(\"#elem\").protoInput = \"{protostr}\";\n",
    "        </script>\"\"\"\n",
    "html_str = HTML_TEMPLATE.format(protostr=protostr)\n",
    "with open(\"index.html\",'w') as fo:\n",
    "    fo.write(html_str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c0a817dc",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}