diff --git "a/Pandas_practice_DataEngg.ipynb" "b/Pandas_practice_DataEngg.ipynb" new file mode 100644--- /dev/null +++ "b/Pandas_practice_DataEngg.ipynb" @@ -0,0 +1,3279 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0\n", + "0 1\n", + "1 2\n", + "2 3\n", + "3 4\n" + ] + } + ], + "source": [ + "df=[1,2,3,4]\n", + "print(pd.DataFrame(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 1\n", + "1 2\n", + "2 3\n", + "3 4\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "print(pd.Series(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [], + "source": [ + "Employ=pd.read_csv(\"employees.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [], + "source": [ + "Employ_dub=Employ.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
First NameGenderStart DateLast Login TimeSalaryBonus %Senior ManagementTeam
0DouglasMale8/6/199312:42 PM973086.945TrueMarketing
1ThomasMale3/31/19966:53 AM619334.170TrueNaN
2MariaFemale4/23/199311:17 AM13059011.858FalseFinance
3JerryMale3/4/20051:00 PM1387059.340TrueFinance
4LarryMale1/24/19984:47 PM1010041.389TrueClient Services
5DennisMale4/18/19871:35 AM11516310.125FalseLegal
6RubyFemale8/17/19874:20 PM6547610.012TrueProduct
7NaNFemale7/20/201510:43 AM4590611.598NaNFinance
8AngelaFemale11/22/20056:29 AM9557018.523TrueEngineering
9FrancesFemale8/8/20026:51 AM1398527.524TrueBusiness Development
10LouiseFemale8/12/19809:01 AM6324115.132TrueNaN
11JulieFemale10/26/19973:19 PM10250812.637TrueLegal
12BrandonMale12/1/19801:08 AM11280717.492TrueHuman Resources
13GaryMale1/27/200811:40 PM1098315.831FalseSales
14KimberlyFemale1/14/19997:13 AM4142614.543TrueFinance
15LillianFemale6/5/20166:09 AM594141.256FalseProduct
16JeremyMale9/21/20105:56 AM903707.369FalseHuman Resources
17ShawnMale12/7/19867:45 PM1117376.414FalseProduct
18DianaFemale10/23/198110:27 AM13294019.082FalseClient Services
19DonnaFemale7/22/20103:48 AM810141.894FalseProduct
\n", + "
" + ], + "text/plain": [ + " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", + "0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n", + "1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n", + "2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n", + "3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n", + "4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n", + "5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n", + "6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n", + "7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n", + "8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n", + "9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n", + "10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n", + "11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n", + "12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n", + "13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n", + "14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n", + "15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n", + "16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n", + "17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n", + "18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n", + "19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n", + "\n", + " Senior Management Team \n", + "0 True Marketing \n", + "1 True NaN \n", + "2 False Finance \n", + "3 True Finance \n", + "4 True Client Services \n", + "5 False Legal \n", + "6 True Product \n", + "7 NaN Finance \n", + "8 True Engineering \n", + "9 True Business Development \n", + "10 True NaN \n", + "11 True Legal \n", + "12 True Human Resources \n", + "13 False Sales \n", + "14 True Finance \n", + "15 False Product \n", + "16 False Human Resources \n", + "17 False Product \n", + "18 False Client Services \n", + "19 False Product " + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Employ_dub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 20 entries, 0 to 19\n", + "Data columns (total 8 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 First Name 19 non-null object \n", + " 1 Gender 20 non-null object \n", + " 2 Start Date 20 non-null object \n", + " 3 Last Login Time 20 non-null object \n", + " 4 Salary 20 non-null int64 \n", + " 5 Bonus % 20 non-null float64\n", + " 6 Senior Management 19 non-null object \n", + " 7 Team 18 non-null object \n", + "dtypes: float64(1), int64(1), object(6)\n", + "memory usage: 868.0+ bytes\n" + ] + } + ], + "source": [ + "#total info about the employee\n", + "Employ_dub.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
First NameGenderStart DateLast Login TimeSalaryBonus %Senior ManagementTeam
0FalseFalseFalseFalseFalseFalseFalseFalse
1FalseFalseFalseFalseFalseFalseFalseTrue
2FalseFalseFalseFalseFalseFalseFalseFalse
3FalseFalseFalseFalseFalseFalseFalseFalse
4FalseFalseFalseFalseFalseFalseFalseFalse
5FalseFalseFalseFalseFalseFalseFalseFalse
6FalseFalseFalseFalseFalseFalseFalseFalse
7TrueFalseFalseFalseFalseFalseTrueFalse
8FalseFalseFalseFalseFalseFalseFalseFalse
9FalseFalseFalseFalseFalseFalseFalseFalse
10FalseFalseFalseFalseFalseFalseFalseTrue
11FalseFalseFalseFalseFalseFalseFalseFalse
12FalseFalseFalseFalseFalseFalseFalseFalse
13FalseFalseFalseFalseFalseFalseFalseFalse
14FalseFalseFalseFalseFalseFalseFalseFalse
15FalseFalseFalseFalseFalseFalseFalseFalse
16FalseFalseFalseFalseFalseFalseFalseFalse
17FalseFalseFalseFalseFalseFalseFalseFalse
18FalseFalseFalseFalseFalseFalseFalseFalse
19FalseFalseFalseFalseFalseFalseFalseFalse
\n", + "
" + ], + "text/plain": [ + " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", + "0 False False False False False False \n", + "1 False False False False False False \n", + "2 False False False False False False \n", + "3 False False False False False False \n", + "4 False False False False False False \n", + "5 False False False False False False \n", + "6 False False False False False False \n", + "7 True False False False False False \n", + "8 False False False False False False \n", + "9 False False False False False False \n", + "10 False False False False False False \n", + "11 False False False False False False \n", + "12 False False False False False False \n", + "13 False False False False False False \n", + "14 False False False False False False \n", + "15 False False False False False False \n", + "16 False False False False False False \n", + "17 False False False False False False \n", + "18 False False False False False False \n", + "19 False False False False False False \n", + "\n", + " Senior Management Team \n", + "0 False False \n", + "1 False True \n", + "2 False False \n", + "3 False False \n", + "4 False False \n", + "5 False False \n", + "6 False False \n", + "7 True False \n", + "8 False False \n", + "9 False False \n", + "10 False True \n", + "11 False False \n", + "12 False False \n", + "13 False False \n", + "14 False False \n", + "15 False False \n", + "16 False False \n", + "17 False False \n", + "18 False False \n", + "19 False False " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Employ_dub.isnull()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "First Name 1\n", + "Gender 0\n", + "Start Date 0\n", + "Last Login Time 0\n", + "Salary 0\n", + "Bonus % 0\n", + "Senior Management 1\n", + "Team 2\n", + "dtype: int64" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#checking for the null values in the dataset of employee\n", + "Employ_dub.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "#changing the name of the dataset\n", + "ed=Employ_dub" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(20, 8)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#dimension of the dataset\n", + "ed.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['First Name', 'Gender', 'Start Date', 'Last Login Time', 'Salary',\n", + " 'Bonus %', 'Senior Management', 'Team'],\n", + " dtype='object')" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "#working on the dictionary for a while:" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "#creating test objects:\n", + "import numpy as np\n", + "ff=pd.DataFrame(np.random.rand(20,5))" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234
00.0207800.3651900.6738250.8001120.188644
10.6608450.2659130.4450280.8894380.601047
20.6469870.9268230.7228380.4752710.827945
30.8717240.2903530.0995780.1099490.229182
40.7047940.8840620.7513270.5957460.612269
50.3712690.5605120.5102640.2479230.618853
60.1503980.1169990.9348650.3157230.221538
70.5563360.8755140.4715260.5395110.271221
80.4282210.5467660.9212740.5005200.400341
90.1501700.8023780.6081240.3428710.076631
100.0990490.2807480.8659390.2145410.083318
110.0428670.7016390.0514570.6913850.051529
120.5308450.2483950.4337330.0494580.314959
130.1422300.7466340.5362470.0964990.123294
140.1396300.0564640.5956440.7640710.193826
150.7096240.5902620.8162680.1879310.366224
160.9829390.2603580.9188970.5312780.304655
170.3818230.0035940.0525970.9215290.022103
180.2279440.7068320.1372660.1291580.882734
190.2262570.8182130.3260710.2304190.668891
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4\n", + "0 0.020780 0.365190 0.673825 0.800112 0.188644\n", + "1 0.660845 0.265913 0.445028 0.889438 0.601047\n", + "2 0.646987 0.926823 0.722838 0.475271 0.827945\n", + "3 0.871724 0.290353 0.099578 0.109949 0.229182\n", + "4 0.704794 0.884062 0.751327 0.595746 0.612269\n", + "5 0.371269 0.560512 0.510264 0.247923 0.618853\n", + "6 0.150398 0.116999 0.934865 0.315723 0.221538\n", + "7 0.556336 0.875514 0.471526 0.539511 0.271221\n", + "8 0.428221 0.546766 0.921274 0.500520 0.400341\n", + "9 0.150170 0.802378 0.608124 0.342871 0.076631\n", + "10 0.099049 0.280748 0.865939 0.214541 0.083318\n", + "11 0.042867 0.701639 0.051457 0.691385 0.051529\n", + "12 0.530845 0.248395 0.433733 0.049458 0.314959\n", + "13 0.142230 0.746634 0.536247 0.096499 0.123294\n", + "14 0.139630 0.056464 0.595644 0.764071 0.193826\n", + "15 0.709624 0.590262 0.816268 0.187931 0.366224\n", + "16 0.982939 0.260358 0.918897 0.531278 0.304655\n", + "17 0.381823 0.003594 0.052597 0.921529 0.022103\n", + "18 0.227944 0.706832 0.137266 0.129158 0.882734\n", + "19 0.226257 0.818213 0.326071 0.230419 0.668891" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ff" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 20 entries, 0 to 19\n", + "Data columns (total 5 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 0 20 non-null float64\n", + " 1 1 20 non-null float64\n", + " 2 2 20 non-null float64\n", + " 3 3 20 non-null float64\n", + " 4 4 20 non-null float64\n", + "dtypes: float64(5)\n", + "memory usage: 868.0 bytes\n" + ] + } + ], + "source": [ + "ff.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datestudents
010/9/202010
111/09/202020
212/09/202030
\n", + "
" + ], + "text/plain": [ + " date students\n", + "0 10/9/2020 10\n", + "1 11/09/2020 20\n", + "2 12/09/2020 30" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#data functon:\n", + "date=pd.DataFrame(\n", + "{\n", + "\"date\":['10/9/2020','11/09/2020','12/09/2020'],\n", + "\"students\":[10,20,30]})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "First Name Gender Start Date Last Login Time Salary Bonus % Senior Management Team \n", + "Angela Female 11/22/2005 6:29 AM 95570 18.523 True Engineering 1\n", + "Jerry Male 3/4/2005 1:00 PM 138705 9.340 True Finance 1\n", + "Ruby Female 8/17/1987 4:20 PM 65476 10.012 True Product 1\n", + "Maria Female 4/23/1993 11:17 AM 130590 11.858 False Finance 1\n", + "Lillian Female 6/5/2016 6:09 AM 59414 1.256 False Product 1\n", + "Larry Male 1/24/1998 4:47 PM 101004 1.389 True Client Services 1\n", + "Kimberly Female 1/14/1999 7:13 AM 41426 14.543 True Finance 1\n", + "Julie Female 10/26/1997 3:19 PM 102508 12.637 True Legal 1\n", + "Jeremy Male 9/21/2010 5:56 AM 90370 7.369 False Human Resources 1\n", + "Brandon Male 12/1/1980 1:08 AM 112807 17.492 True Human Resources 1\n", + "Gary Male 1/27/2008 11:40 PM 109831 5.831 False Sales 1\n", + "Frances Female 8/8/2002 6:51 AM 139852 7.524 True Business Development 1\n", + "Douglas Male 8/6/1993 12:42 PM 97308 6.945 True Marketing 1\n", + "Donna Female 7/22/2010 3:48 AM 81014 1.894 False Product 1\n", + "Diana Female 10/23/1981 10:27 AM 132940 19.082 False Client Services 1\n", + "Dennis Male 4/18/1987 1:35 AM 115163 10.125 False Legal 1\n", + "Shawn Male 12/7/1986 7:45 PM 111737 6.414 False Product 1\n", + "dtype: int64" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GenderSalary
0Male97308
1Male61933
2Female130590
3Male138705
4Male101004
5Male115163
6Female65476
7Female45906
8Female95570
9Female139852
10Female63241
11Female102508
12Male112807
13Male109831
14Female41426
15Female59414
16Male90370
17Male111737
18Female132940
19Female81014
\n", + "
" + ], + "text/plain": [ + " Gender Salary\n", + "0 Male 97308\n", + "1 Male 61933\n", + "2 Female 130590\n", + "3 Male 138705\n", + "4 Male 101004\n", + "5 Male 115163\n", + "6 Female 65476\n", + "7 Female 45906\n", + "8 Female 95570\n", + "9 Female 139852\n", + "10 Female 63241\n", + "11 Female 102508\n", + "12 Male 112807\n", + "13 Male 109831\n", + "14 Female 41426\n", + "15 Female 59414\n", + "16 Male 90370\n", + "17 Male 111737\n", + "18 Female 132940\n", + "19 Female 81014" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed[['Gender','Salary']]" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
First NameGenderStart DateLast Login TimeSalaryBonus %Senior ManagementTeam
8AngelaFemale11/22/20056:29 AM9557018.523TrueEngineering
9FrancesFemale8/8/20026:51 AM1398527.524TrueBusiness Development
10LouiseFemale8/12/19809:01 AM6324115.132TrueNaN
11JulieFemale10/26/19973:19 PM10250812.637TrueLegal
\n", + "
" + ], + "text/plain": [ + " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", + "8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n", + "9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n", + "10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n", + "11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n", + "\n", + " Senior Management Team \n", + "8 True Engineering \n", + "9 True Business Development \n", + "10 True NaN \n", + "11 True Legal " + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#selection by position:rows data:\n", + "ed.iloc[8:12]" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "First Name Angela\n", + "Gender Female\n", + "Start Date 11/22/2005\n", + "Last Login Time 6:29 AM\n", + "Salary 95570\n", + "Bonus % 18.523\n", + "Senior Management True\n", + "Team Engineering\n", + "Name: 8, dtype: object" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed.loc[8]" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "First Name 1\n", + "Gender 0\n", + "Start Date 0\n", + "Last Login Time 0\n", + "Salary 0\n", + "Bonus % 0\n", + "Senior Management 1\n", + "Team 2\n", + "dtype: int64" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#data cleaning:\n", + "ed.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#total null values:\n", + "ed.isnull().sum().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "156" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed.notnull().sum().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "#fpr practice on drop we will take the copy of the original data:\n", + "ed2=ed" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
First NameGenderStart DateLast Login TimeSalaryBonus %Senior ManagementTeam
0DouglasMale8/6/199312:42 PM973086.945TrueMarketing
1ThomasMale3/31/19966:53 AM619334.170TrueNaN
2MariaFemale4/23/199311:17 AM13059011.858FalseFinance
3JerryMale3/4/20051:00 PM1387059.340TrueFinance
4LarryMale1/24/19984:47 PM1010041.389TrueClient Services
5DennisMale4/18/19871:35 AM11516310.125FalseLegal
6RubyFemale8/17/19874:20 PM6547610.012TrueProduct
7NaNFemale7/20/201510:43 AM4590611.598NaNFinance
8AngelaFemale11/22/20056:29 AM9557018.523TrueEngineering
9FrancesFemale8/8/20026:51 AM1398527.524TrueBusiness Development
10LouiseFemale8/12/19809:01 AM6324115.132TrueNaN
11JulieFemale10/26/19973:19 PM10250812.637TrueLegal
12BrandonMale12/1/19801:08 AM11280717.492TrueHuman Resources
13GaryMale1/27/200811:40 PM1098315.831FalseSales
14KimberlyFemale1/14/19997:13 AM4142614.543TrueFinance
15LillianFemale6/5/20166:09 AM594141.256FalseProduct
16JeremyMale9/21/20105:56 AM903707.369FalseHuman Resources
17ShawnMale12/7/19867:45 PM1117376.414FalseProduct
18DianaFemale10/23/198110:27 AM13294019.082FalseClient Services
19DonnaFemale7/22/20103:48 AM810141.894FalseProduct
\n", + "
" + ], + "text/plain": [ + " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", + "0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n", + "1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n", + "2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n", + "3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n", + "4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n", + "5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n", + "6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n", + "7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n", + "8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n", + "9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n", + "10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n", + "11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n", + "12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n", + "13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n", + "14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n", + "15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n", + "16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n", + "17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n", + "18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n", + "19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n", + "\n", + " Senior Management Team \n", + "0 True Marketing \n", + "1 True NaN \n", + "2 False Finance \n", + "3 True Finance \n", + "4 True Client Services \n", + "5 False Legal \n", + "6 True Product \n", + "7 NaN Finance \n", + "8 True Engineering \n", + "9 True Business Development \n", + "10 True NaN \n", + "11 True Legal \n", + "12 True Human Resources \n", + "13 False Sales \n", + "14 True Finance \n", + "15 False Product \n", + "16 False Human Resources \n", + "17 False Product \n", + "18 False Client Services \n", + "19 False Product " + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed2" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "prasent null values: 0\n" + ] + } + ], + "source": [ + "#removing the totyal columns if they are with the null values:\n", + "ed3=ed2.dropna(axis=1)\n", + "print(\"prasent null values:\",ed3.isnull().sum().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " ed2.fillna(10,inplace=True)\n" + ] + } + ], + "source": [ + "ed2.fillna(10,inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "First Name 0\n", + "Gender 0\n", + "Start Date 0\n", + "Last Login Time 0\n", + "Salary 0\n", + "Bonus % 0\n", + "Senior Management 0\n", + "Team 0\n", + "dtype: int64" + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed2.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
First NameGenderStart DateLast Login TimeSalaryBonus %Senior ManagementTeam
0DouglasMale8/6/199312:42 PM973086.945TrueMarketing
1ThomasMale3/31/19966:53 AM619334.170True10
2MariaFemale4/23/199311:17 AM13059011.858FalseFinance
3JerryMale3/4/20051:00 PM1387059.340TrueFinance
4LarryMale1/24/19984:47 PM1010041.389TrueClient Services
5DennisMale4/18/19871:35 AM11516310.125FalseLegal
6RubyFemale8/17/19874:20 PM6547610.012TrueProduct
710Female7/20/201510:43 AM4590611.59810Finance
8AngelaFemale11/22/20056:29 AM9557018.523TrueEngineering
9FrancesFemale8/8/20026:51 AM1398527.524TrueBusiness Development
10LouiseFemale8/12/19809:01 AM6324115.132True10
11JulieFemale10/26/19973:19 PM10250812.637TrueLegal
12BrandonMale12/1/19801:08 AM11280717.492TrueHuman Resources
13GaryMale1/27/200811:40 PM1098315.831FalseSales
14KimberlyFemale1/14/19997:13 AM4142614.543TrueFinance
15LillianFemale6/5/20166:09 AM594141.256FalseProduct
16JeremyMale9/21/20105:56 AM903707.369FalseHuman Resources
17ShawnMale12/7/19867:45 PM1117376.414FalseProduct
18DianaFemale10/23/198110:27 AM13294019.082FalseClient Services
19DonnaFemale7/22/20103:48 AM810141.894FalseProduct
\n", + "
" + ], + "text/plain": [ + " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", + "0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n", + "1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n", + "2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n", + "3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n", + "4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n", + "5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n", + "6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n", + "7 10 Female 7/20/2015 10:43 AM 45906 11.598 \n", + "8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n", + "9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n", + "10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n", + "11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n", + "12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n", + "13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n", + "14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n", + "15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n", + "16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n", + "17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n", + "18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n", + "19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n", + "\n", + " Senior Management Team \n", + "0 True Marketing \n", + "1 True 10 \n", + "2 False Finance \n", + "3 True Finance \n", + "4 True Client Services \n", + "5 False Legal \n", + "6 True Product \n", + "7 10 Finance \n", + "8 True Engineering \n", + "9 True Business Development \n", + "10 True 10 \n", + "11 True Legal \n", + "12 True Human Resources \n", + "13 False Sales \n", + "14 True Finance \n", + "15 False Product \n", + "16 False Human Resources \n", + "17 False Product \n", + "18 False Client Services \n", + "19 False Product " + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed2" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
First NameGenderStart DateLast Login TimeSalaryBonus %Senior ManagementTeam
0DouglasMale8/6/199312:42 PM973086.945TrueMarketing
1ThomasMale3/31/19966:53 AM619334.170TrueNaN
2MariaFemale4/23/199311:17 AM13059011.858FalseFinance
3JerryMale3/4/20051:00 PM1387059.340TrueFinance
4LarryMale1/24/19984:47 PM1010041.389TrueClient Services
5DennisMale4/18/19871:35 AM11516310.125FalseLegal
6RubyFemale8/17/19874:20 PM6547610.012TrueProduct
7NaNFemale7/20/201510:43 AM4590611.598NaNFinance
8AngelaFemale11/22/20056:29 AM9557018.523TrueEngineering
9FrancesFemale8/8/20026:51 AM1398527.524TrueBusiness Development
10LouiseFemale8/12/19809:01 AM6324115.132TrueNaN
11JulieFemale10/26/19973:19 PM10250812.637TrueLegal
12BrandonMale12/1/19801:08 AM11280717.492TrueHuman Resources
13GaryMale1/27/200811:40 PM1098315.831FalseSales
14KimberlyFemale1/14/19997:13 AM4142614.543TrueFinance
15LillianFemale6/5/20166:09 AM594141.256FalseProduct
16JeremyMale9/21/20105:56 AM903707.369FalseHuman Resources
17ShawnMale12/7/19867:45 PM1117376.414FalseProduct
18DianaFemale10/23/198110:27 AM13294019.082FalseClient Services
19DonnaFemale7/22/20103:48 AM810141.894FalseProduct
\n", + "
" + ], + "text/plain": [ + " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", + "0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n", + "1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n", + "2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n", + "3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n", + "4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n", + "5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n", + "6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n", + "7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n", + "8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n", + "9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n", + "10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n", + "11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n", + "12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n", + "13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n", + "14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n", + "15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n", + "16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n", + "17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n", + "18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n", + "19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n", + "\n", + " Senior Management Team \n", + "0 True Marketing \n", + "1 True NaN \n", + "2 False Finance \n", + "3 True Finance \n", + "4 True Client Services \n", + "5 False Legal \n", + "6 True Product \n", + "7 NaN Finance \n", + "8 True Engineering \n", + "9 True Business Development \n", + "10 True NaN \n", + "11 True Legal \n", + "12 True Human Resources \n", + "13 False Sales \n", + "14 True Finance \n", + "15 False Product \n", + "16 False Human Resources \n", + "17 False Product \n", + "18 False Client Services \n", + "19 False Product " + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed5=Employ.head(20)\n", + "ed5" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "First Name 0\n", + "Gender 0\n", + "Start Date 0\n", + "Last Login Time 0\n", + "Salary 0\n", + "Bonus % 0\n", + "Senior Management 0\n", + "Team 0\n", + "dtype: int64" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed2.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datestudents
010/9/202010
111/09/202020
212/09/202030
\n", + "
" + ], + "text/plain": [ + " date students\n", + "0 10/9/2020 10\n", + "1 11/09/2020 20\n", + "2 12/09/2020 30" + ] + }, + "execution_count": 120, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "date" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "date object\n", + "students int64\n", + "dtype: object" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "date.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
checkstudents
010/9/202010
111/09/202020
212/09/202030
\n", + "
" + ], + "text/plain": [ + " check students\n", + "0 10/9/2020 10\n", + "1 11/09/2020 20\n", + "2 12/09/2020 30" + ] + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#rename for the date with the check:\n", + "date.rename(columns={'date':'check'})" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datestudents
212/09/202030
111/09/202020
010/9/202010
\n", + "
" + ], + "text/plain": [ + " date students\n", + "2 12/09/2020 30\n", + "1 11/09/2020 20\n", + "0 10/9/2020 10" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "date.sort_values('students',ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0\n", + "0 1\n", + "1 2\n", + "2 3\n", + "3 4\n", + "4 5\n", + "5 6\n" + ] + } + ], + "source": [ + "#creating the data frame:\n", + "import pandas as pd\n", + "data=[1,2,3,4,5,6]\n", + "d_frame=pd.DataFrame(data)\n", + "print(d_frame)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0 1 2\n", + "x std1 std2 std3\n", + "y azar ameer varun\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "arr=np.array([[\"std1\",\"std2\",\"std3\"],[\"azar\",\"ameer\",\"varun\"]])\n", + "d_frame2=pd.DataFrame(arr,index=['x','y'])\n", + "print(d_frame2)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
xFalse
yFalse
\n", + "
" + ], + "text/plain": [ + " 0\n", + "x False\n", + "y False" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame2.isnull()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
count6.000000
mean3.500000
std1.870829
min1.000000
25%2.250000
50%3.500000
75%4.750000
max6.000000
\n", + "
" + ], + "text/plain": [ + " 0\n", + "count 6.000000\n", + "mean 3.500000\n", + "std 1.870829\n", + "min 1.000000\n", + "25% 2.250000\n", + "50% 3.500000\n", + "75% 4.750000\n", + "max 6.000000" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 3.5\n", + "dtype: float64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 6\n", + "dtype: int64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 6\n", + "dtype: int64" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 3.5\n", + "dtype: float64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame.median()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1.870829\n", + "dtype: float64" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame.std()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 3.5\n", + "dtype: float64" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame.apply(np.mean)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/y8/34wjlypd37q4zn8rwhg1vsqc0000gn/T/ipykernel_7089/515644098.py:1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n", + " data=np.array([[1,2,3,4,5,6],[1,2,3,4,5,6,7]])\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
0[1, 2, 3, 4, 5, 6]
1[1, 2, 3, 4, 5, 6, 7]
\n", + "
" + ], + "text/plain": [ + " 0\n", + "0 [1, 2, 3, 4, 5, 6]\n", + "1 [1, 2, 3, 4, 5, 6, 7]" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data=np.array([[1,2,3,4,5,6],[1,2,3,4,5,6,7]])\n", + "data_frame=pd.DataFrame(data)\n", + "data_frame" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}