{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "daf46b53-319f-4973-9bb6-664135dd328e", "metadata": {}, "outputs": [], "source": [ "import pandas as pd, spacy, nltk, numpy as np, re, ssl" ] }, { "cell_type": "code", "execution_count": 56, "id": "3cae7a11-7696-40fc-967e-7ecafcb2b0da", "metadata": {}, "outputs": [], "source": [ "df = pd.read_excel(\"Assets/Countries/DataPanelWHR2021C2.xls\")" ] }, { "cell_type": "code", "execution_count": 57, "id": "c1ebf3f3-1d38-4919-b60a-dc15e7bf907b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CountryyearLife LadderLog GDP per capitaSocial supportHealthy life expectancy at birthFreedom to make life choicesGenerosityPerceptions of corruptionPositive affectNegative affect
0Afghanistan20083.7235907.3701000.45066250.7999990.7181140.1676400.8816860.5176370.258195
1Afghanistan20094.4017787.5399720.55230851.2000010.6788960.1900990.8500350.5839260.237092
2Afghanistan20104.7583817.6467090.53907551.5999980.6001270.1205900.7067660.6182650.275324
3Afghanistan20113.8317197.6195320.52110451.9199980.4959010.1624270.7311090.6113870.267175
4Afghanistan20123.7829387.7054790.52063752.2400020.5309350.2360320.7756200.7103850.267919
\n", "
" ], "text/plain": [ " Country year Life Ladder Log GDP per capita Social support \\\n", "0 Afghanistan 2008 3.723590 7.370100 0.450662 \n", "1 Afghanistan 2009 4.401778 7.539972 0.552308 \n", "2 Afghanistan 2010 4.758381 7.646709 0.539075 \n", "3 Afghanistan 2011 3.831719 7.619532 0.521104 \n", "4 Afghanistan 2012 3.782938 7.705479 0.520637 \n", "\n", " Healthy life expectancy at birth Freedom to make life choices Generosity \\\n", "0 50.799999 0.718114 0.167640 \n", "1 51.200001 0.678896 0.190099 \n", "2 51.599998 0.600127 0.120590 \n", "3 51.919998 0.495901 0.162427 \n", "4 52.240002 0.530935 0.236032 \n", "\n", " Perceptions of corruption Positive affect Negative affect \n", "0 0.881686 0.517637 0.258195 \n", "1 0.850035 0.583926 0.237092 \n", "2 0.706766 0.618265 0.275324 \n", "3 0.731109 0.611387 0.267175 \n", "4 0.775620 0.710385 0.267919 " ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 59, "id": "a1d054e6-8ca7-4675-913e-b0b500afe105", "metadata": {}, "outputs": [], "source": [ "df_sorted = df.sort_values(by=['year'], ascending = False)" ] }, { "cell_type": "code", "execution_count": 60, "id": "42d08d97-fa68-40dc-9cfd-b0aa8acbb838", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CountryyearLife LadderLog GDP per capitaSocial supportHealthy life expectancy at birthFreedom to make life choicesGenerosityPerceptions of corruptionPositive affectNegative affect
1948Zimbabwe20203.1598027.8287570.71724356.7999990.643303-0.0086960.7885230.7025730.345736
174Benin20204.4077468.1022920.50663655.0999980.783115-0.0834890.5318840.6085850.304512
1835United Kingdom20206.79817710.6258110.92935372.6999970.8846240.2025080.4902040.7581640.224655
1394Philippines20205.0795859.0614430.78114062.0999980.932042-0.1155430.7442840.8035620.326889
785Iraq20204.7851659.1671860.70784761.4000020.700215-0.0207480.8491090.6444640.531539
\n", "
" ], "text/plain": [ " Country year Life Ladder Log GDP per capita Social support \\\n", "1948 Zimbabwe 2020 3.159802 7.828757 0.717243 \n", "174 Benin 2020 4.407746 8.102292 0.506636 \n", "1835 United Kingdom 2020 6.798177 10.625811 0.929353 \n", "1394 Philippines 2020 5.079585 9.061443 0.781140 \n", "785 Iraq 2020 4.785165 9.167186 0.707847 \n", "\n", " Healthy life expectancy at birth Freedom to make life choices \\\n", "1948 56.799999 0.643303 \n", "174 55.099998 0.783115 \n", "1835 72.699997 0.884624 \n", "1394 62.099998 0.932042 \n", "785 61.400002 0.700215 \n", "\n", " Generosity Perceptions of corruption Positive affect Negative affect \n", "1948 -0.008696 0.788523 0.702573 0.345736 \n", "174 -0.083489 0.531884 0.608585 0.304512 \n", "1835 0.202508 0.490204 0.758164 0.224655 \n", "1394 -0.115543 0.744284 0.803562 0.326889 \n", "785 -0.020748 0.849109 0.644464 0.531539 " ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_sorted.head()" ] }, { "cell_type": "code", "execution_count": 61, "id": "abb8954c-106f-42d1-bf2a-0200b8927306", "metadata": {}, "outputs": [], "source": [ "df_dedup = df_sorted.drop_duplicates(subset=['Country'])" ] }, { "cell_type": "code", "execution_count": 62, "id": "969f5fcf-5dc6-4ce3-93f7-0f35473f3c73", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CountryyearLife LadderLog GDP per capitaSocial supportHealthy life expectancy at birthFreedom to make life choicesGenerosityPerceptions of corruptionPositive affectNegative affect
1948Zimbabwe20203.1598027.8287570.71724356.7999990.643303-0.0086960.7885230.7025730.345736
174Benin20204.4077468.1022920.50663655.0999980.783115-0.0834890.5318840.6085850.304512
1835United Kingdom20206.79817710.6258110.92935372.6999970.8846240.2025080.4902040.7581640.224655
1394Philippines20205.0795859.0614430.78114062.0999980.932042-0.1155430.7442840.8035620.326889
785Iraq20204.7851659.1671860.70784761.4000020.700215-0.0207480.8491090.6444640.531539
\n", "
" ], "text/plain": [ " Country year Life Ladder Log GDP per capita Social support \\\n", "1948 Zimbabwe 2020 3.159802 7.828757 0.717243 \n", "174 Benin 2020 4.407746 8.102292 0.506636 \n", "1835 United Kingdom 2020 6.798177 10.625811 0.929353 \n", "1394 Philippines 2020 5.079585 9.061443 0.781140 \n", "785 Iraq 2020 4.785165 9.167186 0.707847 \n", "\n", " Healthy life expectancy at birth Freedom to make life choices \\\n", "1948 56.799999 0.643303 \n", "174 55.099998 0.783115 \n", "1835 72.699997 0.884624 \n", "1394 62.099998 0.932042 \n", "785 61.400002 0.700215 \n", "\n", " Generosity Perceptions of corruption Positive affect Negative affect \n", "1948 -0.008696 0.788523 0.702573 0.345736 \n", "174 -0.083489 0.531884 0.608585 0.304512 \n", "1835 0.202508 0.490204 0.758164 0.224655 \n", "1394 -0.115543 0.744284 0.803562 0.326889 \n", "785 -0.020748 0.849109 0.644464 0.531539 " ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_dedup.head()" ] }, { "cell_type": "code", "execution_count": 63, "id": "d080546c-4698-4edd-8b76-e3c94aee9862", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1949" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df_sorted)" ] }, { "cell_type": "code", "execution_count": 64, "id": "6a817f5c-e871-4d69-9368-00a90efc6007", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "166" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df_dedup)" ] }, { "cell_type": "code", "execution_count": 65, "id": "d6640a42-064e-4b31-b89d-de4f7d4240a3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CountryContinent
0AlgeriaAfrica
1AngolaAfrica
2BeninAfrica
3BotswanaAfrica
4BurkinaAfrica
\n", "
" ], "text/plain": [ " Country Continent\n", "0 Algeria Africa\n", "1 Angola Africa\n", "2 Benin Africa\n", "3 Botswana Africa\n", "4 Burkina Africa" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_csv = pd.read_csv(\"Assets/Countries/countries.csv\")\n", "df_csv.head()" ] }, { "cell_type": "code", "execution_count": 18, "id": "a6e6f52e-cff7-4d78-b630-e71e07fa8842", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "194" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df_csv)" ] }, { "cell_type": "code", "execution_count": 66, "id": "edaae740-75bf-42a2-afa6-ebbbbf50d792", "metadata": {}, "outputs": [], "source": [ "c1 = df_dedup[\"Country\"]\n", "c2 = list(df_csv[\"Country\"])\n", "c3 = [(country, country in c2) for country in c1]" ] }, { "cell_type": "code", "execution_count": 67, "id": "5e86b02e-e5a3-4eaf-b045-74f0d0cfea08", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"Zimbabwe\" in c2" ] }, { "cell_type": "code", "execution_count": 68, "id": "921765a7-6f40-4d6a-9403-f5f8d8f26a65", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('Zimbabwe', True),\n", " ('Benin', True),\n", " ('United Kingdom', True),\n", " ('Philippines', True),\n", " ('Iraq', True),\n", " ('Belgium', True),\n", " ('Iran', True),\n", " ('Poland', True),\n", " ('Portugal', True),\n", " ('India', True),\n", " ('Israel', True),\n", " ('Iceland', True),\n", " ('United Arab Emirates', True),\n", " ('Hungary', True),\n", " ('Hong Kong S.A.R. of China', False),\n", " ('Bolivia', True),\n", " ('Russia', False),\n", " ('Saudi Arabia', True),\n", " ('Ireland', True),\n", " ('Italy', True),\n", " ('Ukraine', True),\n", " ('Kenya', True),\n", " ('Latvia', True),\n", " ('Laos', True),\n", " ('Nigeria', True),\n", " ('Austria', True),\n", " ('Kyrgyzstan', True),\n", " ('North Macedonia', False),\n", " ('Kosovo', False),\n", " ('Norway', True),\n", " ('United States', False),\n", " ('Kazakhstan', True),\n", " ('Bahrain', True),\n", " ('Uruguay', True),\n", " ('Jordan', True),\n", " ('Japan', True),\n", " ('Bangladesh', True),\n", " ('Ivory Coast', True),\n", " ('Bosnia and Herzegovina', True),\n", " ('Greece', True),\n", " ('Australia', True),\n", " ('Croatia', True),\n", " ('Tunisia', True),\n", " ('Spain', True),\n", " ('Denmark', True),\n", " ('Cameroon', True),\n", " ('Czech Republic', False),\n", " ('Cyprus', True),\n", " ('Sweden', True),\n", " ('Canada', True),\n", " ('South Korea', False),\n", " ('Switzerland', True),\n", " ('Thailand', True),\n", " ('Taiwan Province of China', False),\n", " ('Colombia', True),\n", " ('Tajikistan', True),\n", " ('Tanzania', True),\n", " ('China', True),\n", " ('Dominican Republic', True),\n", " ('Cambodia', True),\n", " ('Ghana', True),\n", " ('Slovakia', True),\n", " ('Serbia', True),\n", " ('Uganda', True),\n", " ('Germany', True),\n", " ('Georgia', True),\n", " ('Brazil', True),\n", " ('France', True),\n", " ('Bulgaria', True),\n", " ('Finland', True),\n", " ('Ecuador', True),\n", " ('Ethiopia', True),\n", " ('Slovenia', True),\n", " ('Estonia', True),\n", " ('El Salvador', True),\n", " ('Turkey', True),\n", " ('South Africa', True),\n", " ('Egypt', True),\n", " ('Venezuela', True),\n", " ('Chile', True),\n", " ('Lithuania', True),\n", " ('Moldova', True),\n", " ('Netherlands', True),\n", " ('Mongolia', True),\n", " ('Mauritius', True),\n", " ('Mexico', True),\n", " ('New Zealand', True),\n", " ('Namibia', True),\n", " ('Myanmar', False),\n", " ('Malta', True),\n", " ('Zambia', True),\n", " ('Argentina', True),\n", " ('Morocco', True),\n", " ('Albania', True),\n", " ('Montenegro', True),\n", " ('Guinea', True),\n", " ('Yemen', True),\n", " ('Guatemala', True),\n", " ('Malaysia', True),\n", " ('Rwanda', True),\n", " ('Sri Lanka', True),\n", " ('Malawi', True),\n", " ('Nepal', True),\n", " ('Swaziland', True),\n", " ('Romania', True),\n", " ('Senegal', True),\n", " ('Honduras', True),\n", " ('Mali', True),\n", " ('Mauritania', True),\n", " ('Turkmenistan', True),\n", " ('Burkina Faso', False),\n", " ('Algeria', True),\n", " ('Botswana', True),\n", " ('Sierra Leone', True),\n", " ('Mozambique', True),\n", " ('Singapore', True),\n", " ('Gambia', True),\n", " ('Gabon', True),\n", " ('Indonesia', True),\n", " ('Azerbaijan', True),\n", " ('Chad', True),\n", " ('Liberia', True),\n", " ('Libya', True),\n", " ('Pakistan', True),\n", " ('Armenia', True),\n", " ('Comoros', True),\n", " ('Afghanistan', True),\n", " ('Palestinian Territories', False),\n", " ('Nicaragua', True),\n", " ('Niger', True),\n", " ('Lebanon', True),\n", " ('Lesotho', True),\n", " ('Uzbekistan', True),\n", " ('North Cyprus', False),\n", " ('Kuwait', True),\n", " ('Congo (Brazzaville)', False),\n", " ('Peru', True),\n", " ('Vietnam', True),\n", " ('Togo', True),\n", " ('Belarus', True),\n", " ('Madagascar', True),\n", " ('Costa Rica', True),\n", " ('Luxembourg', True),\n", " ('Panama', True),\n", " ('Paraguay', True),\n", " ('Jamaica', True),\n", " ('Maldives', True),\n", " ('Haiti', True),\n", " ('Burundi', True),\n", " ('Congo (Kinshasa)', False),\n", " ('Central African Republic', True),\n", " ('Trinidad and Tobago', True),\n", " ('South Sudan', True),\n", " ('Somalia', True),\n", " ('Syria', True),\n", " ('Qatar', True),\n", " ('Bhutan', True),\n", " ('Sudan', True),\n", " ('Angola', True),\n", " ('Belize', True),\n", " ('Suriname', True),\n", " ('Somaliland region', False),\n", " ('Oman', True),\n", " ('Djibouti', True),\n", " ('Guyana', True),\n", " ('Cuba', True)]" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c3" ] }, { "cell_type": "code", "execution_count": 37, "id": "ff74b057-7281-4ab2-82c5-367e949fbbed", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Hong Kong S.A.R. of China',\n", " 'Russia',\n", " 'North Macedonia',\n", " 'Kosovo',\n", " 'United States',\n", " 'Czech Republic',\n", " 'South Korea',\n", " 'Taiwan Province of China',\n", " 'Myanmar',\n", " 'Burkina Faso',\n", " 'Palestinian Territories',\n", " 'North Cyprus',\n", " 'Congo (Brazzaville)',\n", " 'Congo (Kinshasa)',\n", " 'Somaliland region']" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "num = 0\n", "missing = []\n", "for pair in c3:\n", " if pair[1]:\n", " num +=1\n", " else:\n", " missing.append(pair[0]) \n", "num\n", "missing" ] }, { "cell_type": "code", "execution_count": 44, "id": "50f20260-3ed6-4f4e-a558-e3c6374ecb26", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Africa'" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_csv.loc[df_csv['Country'] == \"Madagascar\", 'Continent'].iloc[0]" ] }, { "cell_type": "code", "execution_count": 50, "id": "9dfa66ef-1c2b-4893-8993-107c2e02a2c8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Country nameyearLife LadderLog GDP per capitaSocial supportHealthy life expectancy at birthFreedom to make life choicesGenerosityPerceptions of corruptionPositive affectNegative affectContinent
1948Zimbabwe20203.1598027.8287570.71724356.7999990.643303-0.0086960.7885230.7025730.345736<pandas.core.indexing._iLocIndexer object at 0...
174Benin20204.4077468.1022920.50663655.0999980.783115-0.0834890.5318840.6085850.304512<pandas.core.indexing._iLocIndexer object at 0...
1835United Kingdom20206.79817710.6258110.92935372.6999970.8846240.2025080.4902040.7581640.224655<pandas.core.indexing._iLocIndexer object at 0...
1394Philippines20205.0795859.0614430.78114062.0999980.932042-0.1155430.7442840.8035620.326889<pandas.core.indexing._iLocIndexer object at 0...
785Iraq20204.7851659.1671860.70784761.4000020.700215-0.0207480.8491090.6444640.531539<pandas.core.indexing._iLocIndexer object at 0...
\n", "
" ], "text/plain": [ " Country name year Life Ladder Log GDP per capita Social support \\\n", "1948 Zimbabwe 2020 3.159802 7.828757 0.717243 \n", "174 Benin 2020 4.407746 8.102292 0.506636 \n", "1835 United Kingdom 2020 6.798177 10.625811 0.929353 \n", "1394 Philippines 2020 5.079585 9.061443 0.781140 \n", "785 Iraq 2020 4.785165 9.167186 0.707847 \n", "\n", " Healthy life expectancy at birth Freedom to make life choices \\\n", "1948 56.799999 0.643303 \n", "174 55.099998 0.783115 \n", "1835 72.699997 0.884624 \n", "1394 62.099998 0.932042 \n", "785 61.400002 0.700215 \n", "\n", " Generosity Perceptions of corruption Positive affect Negative affect \\\n", "1948 -0.008696 0.788523 0.702573 0.345736 \n", "174 -0.083489 0.531884 0.608585 0.304512 \n", "1835 0.202508 0.490204 0.758164 0.224655 \n", "1394 -0.115543 0.744284 0.803562 0.326889 \n", "785 -0.020748 0.849109 0.644464 0.531539 \n", "\n", " Continent \n", "1948 \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CountryyearLife LadderLog GDP per capitaSocial supportHealthy life expectancy at birthFreedom to make life choicesGenerosityPerceptions of corruptionPositive affectNegative affectContinent
0Zimbabwe20203.1598027.8287570.71724356.7999990.643303-0.0086960.7885230.7025730.345736Africa
1Benin20204.4077468.1022920.50663655.0999980.783115-0.0834890.5318840.6085850.304512Africa
2United Kingdom20206.79817710.6258110.92935372.6999970.8846240.2025080.4902040.7581640.224655Europe
3Philippines20205.0795859.0614430.78114062.0999980.932042-0.1155430.7442840.8035620.326889Asia
4Iraq20204.7851659.1671860.70784761.4000020.700215-0.0207480.8491090.6444640.531539Asia
\n", "" ], "text/plain": [ " Country year Life Ladder Log GDP per capita Social support \\\n", "0 Zimbabwe 2020 3.159802 7.828757 0.717243 \n", "1 Benin 2020 4.407746 8.102292 0.506636 \n", "2 United Kingdom 2020 6.798177 10.625811 0.929353 \n", "3 Philippines 2020 5.079585 9.061443 0.781140 \n", "4 Iraq 2020 4.785165 9.167186 0.707847 \n", "\n", " Healthy life expectancy at birth Freedom to make life choices Generosity \\\n", "0 56.799999 0.643303 -0.008696 \n", "1 55.099998 0.783115 -0.083489 \n", "2 72.699997 0.884624 0.202508 \n", "3 62.099998 0.932042 -0.115543 \n", "4 61.400002 0.700215 -0.020748 \n", "\n", " Perceptions of corruption Positive affect Negative affect Continent \n", "0 0.788523 0.702573 0.345736 Africa \n", "1 0.531884 0.608585 0.304512 Africa \n", "2 0.490204 0.758164 0.224655 Europe \n", "3 0.744284 0.803562 0.326889 Asia \n", "4 0.849109 0.644464 0.531539 Asia " ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_cont.head()" ] }, { "cell_type": "code", "execution_count": 81, "id": "fb26fc2f-f591-4e66-9357-0928c2c46e89", "metadata": {}, "outputs": [], "source": [ "# I updated the name of the output so that I don't accidentally overwrite the manual work I did at the end to add in the last few outliers.\n", "#df_cont.to_csv(\"Assets/Countries/base-combined-countries.csv\")" ] }, { "cell_type": "code", "execution_count": 83, "id": "445a79b2-0023-4812-b606-1ff9cb7720e7", "metadata": {}, "outputs": [], "source": [ "df3 = df_csv.set_index('Country').join(df_dedup.set_index('Country'), on='Country', how='left')" ] }, { "cell_type": "code", "execution_count": 87, "id": "59c3d6bb-11ea-4b4f-9a9e-d9b58561e8f2", "metadata": {}, "outputs": [], "source": [ "df3 = df3[df3.year.isnull()]" ] }, { "cell_type": "code", "execution_count": 88, "id": "3b76dce1-a02f-4b09-bc44-b0e28271bc56", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ContinentyearLife LadderLog GDP per capitaSocial supportHealthy life expectancy at birthFreedom to make life choicesGenerosityPerceptions of corruptionPositive affectNegative affect
Country
BurkinaAfricaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Cape VerdeAfricaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
CongoAfricaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Congo, Democratic Republic ofAfricaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Equatorial GuineaAfricaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
EritreaAfricaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Guinea-BissauAfricaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Sao Tome and PrincipeAfricaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
SeychellesAfricaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
BruneiAsiaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Burma (Myanmar)AsiaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
East TimorAsiaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Korea, NorthAsiaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Korea, SouthAsiaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Russian FederationAsiaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AndorraEuropeNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
CZEuropeNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
LiechtensteinEuropeNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
MacedoniaEuropeNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
MonacoEuropeNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
San MarinoEuropeNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Vatican CityEuropeNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Antigua and BarbudaNorth AmericaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
BahamasNorth AmericaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
BarbadosNorth AmericaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
DominicaNorth AmericaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
GrenadaNorth AmericaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Saint Kitts and NevisNorth AmericaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Saint LuciaNorth AmericaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Saint Vincent and the GrenadinesNorth AmericaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
USNorth AmericaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
FijiOceaniaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
KiribatiOceaniaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Marshall IslandsOceaniaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
MicronesiaOceaniaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NauruOceaniaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
PalauOceaniaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Papua New GuineaOceaniaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
SamoaOceaniaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Solomon IslandsOceaniaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
TongaOceaniaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
TuvaluOceaniaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
VanuatuOceaniaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " Continent year Life Ladder \\\n", "Country \n", "Burkina Africa NaN NaN \n", "Cape Verde Africa NaN NaN \n", "Congo Africa NaN NaN \n", "Congo, Democratic Republic of Africa NaN NaN \n", "Equatorial Guinea Africa NaN NaN \n", "Eritrea Africa NaN NaN \n", "Guinea-Bissau Africa NaN NaN \n", "Sao Tome and Principe Africa NaN NaN \n", "Seychelles Africa NaN NaN \n", "Brunei Asia NaN NaN \n", "Burma (Myanmar) Asia NaN NaN \n", "East Timor Asia NaN NaN \n", "Korea, North Asia NaN NaN \n", "Korea, South Asia NaN NaN \n", "Russian Federation Asia NaN NaN \n", "Andorra Europe NaN NaN \n", "CZ Europe NaN NaN \n", "Liechtenstein Europe NaN NaN \n", "Macedonia Europe NaN NaN \n", "Monaco Europe NaN NaN \n", "San Marino Europe NaN NaN \n", "Vatican City Europe NaN NaN \n", "Antigua and Barbuda North America NaN NaN \n", "Bahamas North America NaN NaN \n", "Barbados North America NaN NaN \n", "Dominica North America NaN NaN \n", "Grenada North America NaN NaN \n", "Saint Kitts and Nevis North America NaN NaN \n", "Saint Lucia North America NaN NaN \n", "Saint Vincent and the Grenadines North America NaN NaN \n", "US North America NaN NaN \n", "Fiji Oceania NaN NaN \n", "Kiribati Oceania NaN NaN \n", "Marshall Islands Oceania NaN NaN \n", "Micronesia Oceania NaN NaN \n", "Nauru Oceania NaN NaN \n", "Palau Oceania NaN NaN \n", "Papua New Guinea Oceania NaN NaN \n", "Samoa Oceania NaN NaN \n", "Solomon Islands Oceania NaN NaN \n", "Tonga Oceania NaN NaN \n", "Tuvalu Oceania NaN NaN \n", "Vanuatu Oceania NaN NaN \n", "\n", " Log GDP per capita Social support \\\n", "Country \n", "Burkina NaN NaN \n", "Cape Verde NaN NaN \n", "Congo NaN NaN \n", "Congo, Democratic Republic of NaN NaN \n", "Equatorial Guinea NaN NaN \n", "Eritrea NaN NaN \n", "Guinea-Bissau NaN NaN \n", "Sao Tome and Principe NaN NaN \n", "Seychelles NaN NaN \n", "Brunei NaN NaN \n", "Burma (Myanmar) NaN NaN \n", "East Timor NaN NaN \n", "Korea, North NaN NaN \n", "Korea, South NaN NaN \n", "Russian Federation NaN NaN \n", "Andorra NaN NaN \n", "CZ NaN NaN \n", "Liechtenstein NaN NaN \n", "Macedonia NaN NaN \n", "Monaco NaN NaN \n", "San Marino NaN NaN \n", "Vatican City NaN NaN \n", "Antigua and Barbuda NaN NaN \n", "Bahamas NaN NaN \n", "Barbados NaN NaN \n", "Dominica NaN NaN \n", "Grenada NaN NaN \n", "Saint Kitts and Nevis NaN NaN \n", "Saint Lucia NaN NaN \n", "Saint Vincent and the Grenadines NaN NaN \n", "US NaN NaN \n", "Fiji NaN NaN \n", "Kiribati NaN NaN \n", "Marshall Islands NaN NaN \n", "Micronesia NaN NaN \n", "Nauru NaN NaN \n", "Palau NaN NaN \n", "Papua New Guinea NaN NaN \n", "Samoa NaN NaN \n", "Solomon Islands NaN NaN \n", "Tonga NaN NaN \n", "Tuvalu NaN NaN \n", "Vanuatu NaN NaN \n", "\n", " Healthy life expectancy at birth \\\n", "Country \n", "Burkina NaN \n", "Cape Verde NaN \n", "Congo NaN \n", "Congo, Democratic Republic of NaN \n", "Equatorial Guinea NaN \n", "Eritrea NaN \n", "Guinea-Bissau NaN \n", "Sao Tome and Principe NaN \n", "Seychelles NaN \n", "Brunei NaN \n", "Burma (Myanmar) NaN \n", "East Timor NaN \n", "Korea, North NaN \n", "Korea, South NaN \n", "Russian Federation NaN \n", "Andorra NaN \n", "CZ NaN \n", "Liechtenstein NaN \n", "Macedonia NaN \n", "Monaco NaN \n", "San Marino NaN \n", "Vatican City NaN \n", "Antigua and Barbuda NaN \n", "Bahamas NaN \n", "Barbados NaN \n", "Dominica NaN \n", "Grenada NaN \n", "Saint Kitts and Nevis NaN \n", "Saint Lucia NaN \n", "Saint Vincent and the Grenadines NaN \n", "US NaN \n", "Fiji NaN \n", "Kiribati NaN \n", "Marshall Islands NaN \n", "Micronesia NaN \n", "Nauru NaN \n", "Palau NaN \n", "Papua New Guinea NaN \n", "Samoa NaN \n", "Solomon Islands NaN \n", "Tonga NaN \n", "Tuvalu NaN \n", "Vanuatu NaN \n", "\n", " Freedom to make life choices Generosity \\\n", "Country \n", "Burkina NaN NaN \n", "Cape Verde NaN NaN \n", "Congo NaN NaN \n", "Congo, Democratic Republic of NaN NaN \n", "Equatorial Guinea NaN NaN \n", "Eritrea NaN NaN \n", "Guinea-Bissau NaN NaN \n", "Sao Tome and Principe NaN NaN \n", "Seychelles NaN NaN \n", "Brunei NaN NaN \n", "Burma (Myanmar) NaN NaN \n", "East Timor NaN NaN \n", "Korea, North NaN NaN \n", "Korea, South NaN NaN \n", "Russian Federation NaN NaN \n", "Andorra NaN NaN \n", "CZ NaN NaN \n", "Liechtenstein NaN NaN \n", "Macedonia NaN NaN \n", "Monaco NaN NaN \n", "San Marino NaN NaN \n", "Vatican City NaN NaN \n", "Antigua and Barbuda NaN NaN \n", "Bahamas NaN NaN \n", "Barbados NaN NaN \n", "Dominica NaN NaN \n", "Grenada NaN NaN \n", "Saint Kitts and Nevis NaN NaN \n", "Saint Lucia NaN NaN \n", "Saint Vincent and the Grenadines NaN NaN \n", "US NaN NaN \n", "Fiji NaN NaN \n", "Kiribati NaN NaN \n", "Marshall Islands NaN NaN \n", "Micronesia NaN NaN \n", "Nauru NaN NaN \n", "Palau NaN NaN \n", "Papua New Guinea NaN NaN \n", "Samoa NaN NaN \n", "Solomon Islands NaN NaN \n", "Tonga NaN NaN \n", "Tuvalu NaN NaN \n", "Vanuatu NaN NaN \n", "\n", " Perceptions of corruption Positive affect \\\n", "Country \n", "Burkina NaN NaN \n", "Cape Verde NaN NaN \n", "Congo NaN NaN \n", "Congo, Democratic Republic of NaN NaN \n", "Equatorial Guinea NaN NaN \n", "Eritrea NaN NaN \n", "Guinea-Bissau NaN NaN \n", "Sao Tome and Principe NaN NaN \n", "Seychelles NaN NaN \n", "Brunei NaN NaN \n", "Burma (Myanmar) NaN NaN \n", "East Timor NaN NaN \n", "Korea, North NaN NaN \n", "Korea, South NaN NaN \n", "Russian Federation NaN NaN \n", "Andorra NaN NaN \n", "CZ NaN NaN \n", "Liechtenstein NaN NaN \n", "Macedonia NaN NaN \n", "Monaco NaN NaN \n", "San Marino NaN NaN \n", "Vatican City NaN NaN \n", "Antigua and Barbuda NaN NaN \n", "Bahamas NaN NaN \n", "Barbados NaN NaN \n", "Dominica NaN NaN \n", "Grenada NaN NaN \n", "Saint Kitts and Nevis NaN NaN \n", "Saint Lucia NaN NaN \n", "Saint Vincent and the Grenadines NaN NaN \n", "US NaN NaN \n", "Fiji NaN NaN \n", "Kiribati NaN NaN \n", "Marshall Islands NaN NaN \n", "Micronesia NaN NaN \n", "Nauru NaN NaN \n", "Palau NaN NaN \n", "Papua New Guinea NaN NaN \n", "Samoa NaN NaN \n", "Solomon Islands NaN NaN \n", "Tonga NaN NaN \n", "Tuvalu NaN NaN \n", "Vanuatu NaN NaN \n", "\n", " Negative affect \n", "Country \n", "Burkina NaN \n", "Cape Verde NaN \n", "Congo NaN \n", "Congo, Democratic Republic of NaN \n", "Equatorial Guinea NaN \n", "Eritrea NaN \n", "Guinea-Bissau NaN \n", "Sao Tome and Principe NaN \n", "Seychelles NaN \n", "Brunei NaN \n", "Burma (Myanmar) NaN \n", "East Timor NaN \n", "Korea, North NaN \n", "Korea, South NaN \n", "Russian Federation NaN \n", "Andorra NaN \n", "CZ NaN \n", "Liechtenstein NaN \n", "Macedonia NaN \n", "Monaco NaN \n", "San Marino NaN \n", "Vatican City NaN \n", "Antigua and Barbuda NaN \n", "Bahamas NaN \n", "Barbados NaN \n", "Dominica NaN \n", "Grenada NaN \n", "Saint Kitts and Nevis NaN \n", "Saint Lucia NaN \n", "Saint Vincent and the Grenadines NaN \n", "US NaN \n", "Fiji NaN \n", "Kiribati NaN \n", "Marshall Islands NaN \n", "Micronesia NaN \n", "Nauru NaN \n", "Palau NaN \n", "Papua New Guinea NaN \n", "Samoa NaN \n", "Solomon Islands NaN \n", "Tonga NaN \n", "Tuvalu NaN \n", "Vanuatu NaN " ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3" ] }, { "cell_type": "markdown", "id": "db01b828-d1b1-4708-b6bd-3b2dbed54746", "metadata": {}, "source": [ "> Note that I updated these in the spreadsheet manually with Excel because it was faster to do it by hand... I should go back when I have time to do it programmatically..." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 }