{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import math\n", "import time" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Table of contents\n", "1. [Loading data from the website](#websiteData)\n", "2. [Loading data locally](#localData)\n", "3. [Dataset creation initial functions](#datasetCreate) \n", " a. [Victory Point (target) dataset](#VPData) \n", " b. [Features dataset](#featuresData)\n", "4. [Speeding check & speed up dataset creation & sense-checking on small batch](#speedUp)\n", "5. [Cleaning & filtering the data](#cleanUp)\n", "6. [Creating the full dataset](#fullDataset)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading data from the website (not using this right now - for now we'll just load locally) \n", "But we'll keep this section here for now, commented out. The reason we're not using this is because the jsons are nested, which would require a bit of time to unpack. I'd like to focus on the cleaning / making some form of useful data and modelling, for now. Later we can go back and get more data." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# bigoldlist = ['2013-02.json',\n", "# '2013-03.json',\n", "# '2013-04.json',\n", "# '2013-05.json',\n", "# '2013-06.json',\n", "# '2013-07.json',\n", "# '2013-08.json',\n", "# '2013-09.json',\n", "# '2013-10.json',\n", "# '2013-11.json',\n", "# '2013-12.json',\n", "# '2014-01.json',\n", "# '2014-02.json',\n", "# '2014-03.json',\n", "# '2014-04.json',\n", "# '2014-05.json',\n", "# '2014-06.json',\n", "# '2014-07.json',\n", "# '2014-08.json',\n", "# '2014-09.json',\n", "# '2014-10.json',\n", "# '2014-11.json',\n", "# '2014-12.json',\n", "# '2015-01.json',\n", "# '2015-02.json',\n", "# '2015-03.json',\n", "# '2015-04.json',\n", "# '2015-05.json',\n", "# '2015-06.json',\n", "# '2015-07.json',\n", "# '2015-08.json',\n", "# '2015-09.json',\n", "# '2015-10.json',\n", "# '2015-11.json',\n", "# '2015-12.json',\n", "# '2016-01.json',\n", "# '2016-02.json',\n", "# '2016-03.json',\n", "# '2016-04.json',\n", "# '2016-05.json',\n", "# '2016-06.json',\n", "# '2016-07.json',\n", "# '2016-08.json',\n", "# '2016-09.json',\n", "# '2016-10.json',\n", "# '2016-11.json',\n", "# '2016-12.json',\n", "# '2017-01.json',\n", "# '2017-02.json',\n", "# '2017-03.json',\n", "# '2017-04.json',\n", "# '2017-05.json',\n", "# '2017-06.json',\n", "# '2017-07.json',\n", "# '2017-08.json',\n", "# '2017-09.json',\n", "# '2017-10.json',\n", "# '2017-11.json',\n", "# '2017-12.json',\n", "# '2018-01.json',\n", "# '2018-02.json',\n", "# '2018-03.json',\n", "# '2018-04.json',\n", "# '2018-05.json',\n", "# '2018-06.json',\n", "# '2018-07.json',\n", "# '2018-08.json',\n", "# '2018-09.json',\n", "# '2018-10.json',\n", "# '2018-11.json',\n", "# '2018-12.json',\n", "# '2019-01.json',\n", "# '2019-02.json',\n", "# '2019-03.json',\n", "# '2019-04.json',\n", "# '2019-05.json',\n", "# '2019-06.json',\n", "# '2019-07.json',\n", "# '2019-08.json',\n", "# '2019-09.json',\n", "# '2019-10.json',\n", "# '2019-11.json',\n", "# '2019-12.json',\n", "# '2020-01.json',\n", "# '2020-02.json',\n", "# '2020-03.json',\n", "# '2020-04.json',\n", "# '2020-05.json',\n", "# '2020-06.json',\n", "# '2020-07.json',\n", "# '2020-08.json',\n", "# '2020-09.json',\n", "# '2020-10.json',\n", "# '2020-11.json',\n", "# '2020-12.json',\n", "# '2021-01.json',\n", "# '2021-02.json',\n", "# '2021-03.json',\n", "# '2021-04.json']" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# # Test start of web pipeline (not used)\n", "# url = 'https://terra.snellman.net/data/events/' + bigoldlist[0]\n", "# eventsdata1 = pd.read_json(url)\n", "# print(len(eventsdata1))\n", "# eventsdata1.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# # Load in the data remotely\n", "# for gameset in bigoldlist:\n", "# url = 'https://terra.snellman.net/data/events/' + gameset\n", "# eventsdata = pd.read_json(url)\n", " \n", "# for eachgame in range(len(gameset)):\n", "# singlerow = eventsdata.iloc[[eachgame]]\n", "# newcol = get_vp_from_game(gameEventRow)\n", "# vpdf = vpdf.append(newdf, ignore_index=True) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading in the data Locally " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "folderlocation = \"D:/PycharmProjects/TerraBot/terra-mystica\"\n", "gameevents = pd.read_csv(f'{folderlocation}/game_events.csv')\n", "games = pd.read_csv(f'{folderlocation}/games.csv')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## EDA" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of game events: 73419074\n", "Number of games in gameevents: 115612\n", "len of games: 115612\n" ] } ], "source": [ "# get some more stats\n", "gameslist = list(pd.unique(gameevents['game']))\n", "print(f'Number of game events: {len(gameevents)}')\n", "print(f\"Number of games in gameevents: {len(gameslist)}\")\n", "print(f'len of games: {len(games)}')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
base_mapgamelast_updateplayer_count
0126fe960806d587c78546b30f1a90853b1ada46800000000012015-07-22 05:15:512
1be8f6ebf549404d015547152d5f2a1906ae8dd900506152015-07-13 09:45:004
2126fe960806d587c78546b30f1a90853b1ada46805122015-07-03 04:17:314
3126fe960806d587c78546b30f1a90853b1ada4680515012015-07-16 21:00:545
495a66999127893f5925a5f591d54f8bcb9a670e60603032015-07-02 05:20:194
...............
115607126fe960806d587c78546b30f1a90853b1ada468Youreacrookcaptianhook2014-09-04 05:04:453
115608126fe960806d587c78546b30f1a90853b1ada468YourMomIsAChaosMagician2014-09-02 00:23:434
115609126fe960806d587c78546b30f1a90853b1ada468ytuwertqwtr2014-09-28 18:12:352
115610126fe960806d587c78546b30f1a90853b1ada468yuertyqert2014-09-27 19:01:172
115611126fe960806d587c78546b30f1a90853b1ada468ZeicheMasZeuchnis2014-09-10 20:35:323
\n", "

115612 rows × 4 columns

\n", "
" ], "text/plain": [ " base_map game \\\n", "0 126fe960806d587c78546b30f1a90853b1ada468 0000000001 \n", "1 be8f6ebf549404d015547152d5f2a1906ae8dd90 050615 \n", "2 126fe960806d587c78546b30f1a90853b1ada468 0512 \n", "3 126fe960806d587c78546b30f1a90853b1ada468 051501 \n", "4 95a66999127893f5925a5f591d54f8bcb9a670e6 060303 \n", "... ... ... \n", "115607 126fe960806d587c78546b30f1a90853b1ada468 Youreacrookcaptianhook \n", "115608 126fe960806d587c78546b30f1a90853b1ada468 YourMomIsAChaosMagician \n", "115609 126fe960806d587c78546b30f1a90853b1ada468 ytuwertqwtr \n", "115610 126fe960806d587c78546b30f1a90853b1ada468 yuertyqert \n", "115611 126fe960806d587c78546b30f1a90853b1ada468 ZeicheMasZeuchnis \n", "\n", " last_update player_count \n", "0 2015-07-22 05:15:51 2 \n", "1 2015-07-13 09:45:00 4 \n", "2 2015-07-03 04:17:31 4 \n", "3 2015-07-16 21:00:54 5 \n", "4 2015-07-02 05:20:19 4 \n", "... ... ... \n", "115607 2014-09-04 05:04:45 3 \n", "115608 2014-09-02 00:23:43 4 \n", "115609 2014-09-28 18:12:35 2 \n", "115610 2014-09-27 19:01:17 2 \n", "115611 2014-09-10 20:35:32 3 \n", "\n", "[115612 rows x 4 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "games" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eventfactiongamenumroundturn
0upgrade:SHhalflings0000000001121
1order:2halflings0000000001161
2order:2halflings0000000001121
3order:1halflings0000000001111
4order:1halflings0000000001151
.....................
73419069burndarklingsZeicheMasZeuchnis411
73419070burndarklingsZeicheMasZeuchnis352
73419071favor:FAV10darklingsZeicheMasZeuchnis133
73419072town:TW3darklingsZeicheMasZeuchnis162
73419073action:ACT3darklingsZeicheMasZeuchnis111
\n", "

73419074 rows × 6 columns

\n", "
" ], "text/plain": [ " event faction game num round turn\n", "0 upgrade:SH halflings 0000000001 1 2 1\n", "1 order:2 halflings 0000000001 1 6 1\n", "2 order:2 halflings 0000000001 1 2 1\n", "3 order:1 halflings 0000000001 1 1 1\n", "4 order:1 halflings 0000000001 1 5 1\n", "... ... ... ... ... ... ...\n", "73419069 burn darklings ZeicheMasZeuchnis 4 1 1\n", "73419070 burn darklings ZeicheMasZeuchnis 3 5 2\n", "73419071 favor:FAV10 darklings ZeicheMasZeuchnis 1 3 3\n", "73419072 town:TW3 darklings ZeicheMasZeuchnis 1 6 2\n", "73419073 action:ACT3 darklings ZeicheMasZeuchnis 1 1 1\n", "\n", "[73419074 rows x 6 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gameevents" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['halflings', 'all', 'mermaids', 'dragonlords', 'riverwalkers',\n", " 'yetis', 'darklings', 'chaosmagicians', 'engineers', 'icemaidens',\n", " 'nomads', 'auren', 'fakirs', 'witches', 'cultists', 'alchemists',\n", " 'swarmlings', 'dwarves', 'shapeshifters', 'nofaction1',\n", " 'nofaction4', 'acolytes', 'giants', 'nofaction3', 'nofaction2',\n", " 'nofaction5', 'nofaction7'], dtype=object)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "allfactions = pd.unique(gameevents['faction'])\n", "allfactions" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "gamescoringtiles = pd.read_csv(f'{folderlocation}/game_scoring_tiles.csv')\n", "gameoptions = pd.read_csv(f'{folderlocation}/game_options.csv')\n", "stats = pd.read_csv(f'{folderlocation}/stats.csv')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gameroundtile
000000000013SCORE1
100000000016SCORE5
200000000015SCORE4
300000000011SCORE8
400000000012SCORE3
............
693655ZeicheMasZeuchnis4SCORE1
693656ZeicheMasZeuchnis6SCORE5
693657ZeicheMasZeuchnis5SCORE7
693658ZeicheMasZeuchnis1SCORE3
693659ZeicheMasZeuchnis2SCORE8
\n", "

693660 rows × 3 columns

\n", "
" ], "text/plain": [ " game round tile\n", "0 0000000001 3 SCORE1\n", "1 0000000001 6 SCORE5\n", "2 0000000001 5 SCORE4\n", "3 0000000001 1 SCORE8\n", "4 0000000001 2 SCORE3\n", "... ... ... ...\n", "693655 ZeicheMasZeuchnis 4 SCORE1\n", "693656 ZeicheMasZeuchnis 6 SCORE5\n", "693657 ZeicheMasZeuchnis 5 SCORE7\n", "693658 ZeicheMasZeuchnis 1 SCORE3\n", "693659 ZeicheMasZeuchnis 2 SCORE8\n", "\n", "[693660 rows x 3 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gamescoringtiles" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "SCORE3 83727\n", "SCORE7 83435\n", "SCORE8 83322\n", "SCORE4 83270\n", "SCORE2 83226\n", "SCORE5 83188\n", "SCORE6 82770\n", "SCORE1 71728\n", "SCORE9 38994\n", "Name: tile, dtype: int64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gamescoringtiles['tile'].value_counts()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "19\n", "option-errata-cultist-power 0.106882\n", "option-strict-leech 0.106427\n", "option-strict-darkling-sh 0.103665\n", "option-strict-chaosmagician-sh 0.103642\n", "option-email-notify 0.100557\n", "option-mini-expansion-1 0.089982\n", "option-shipping-bonus 0.083785\n", "option-variable-turn-order 0.075660\n", "option-temple-scoring-tile 0.053650\n", "option-fire-and-ice-factions/ice 0.039872\n", "option-fire-and-ice-final-scoring 0.039481\n", "option-fire-and-ice-factions/volcano 0.039336\n", "option-fire-and-ice-factions/variable_v5 0.024844\n", "option-maintain-player-order 0.021085\n", "option-fire-and-ice-factions/variable_v3 0.004580\n", "option-fire-and-ice-factions/variable 0.003220\n", "option-fire-and-ice-factions/variable_v4 0.002017\n", "option-fire-and-ice-factions/variable_v2 0.001303\n", "option-loose-adjust-resource 0.000012\n", "Name: option, dtype: float64\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gameoption
00000000001option-strict-darkling-sh
10000000001option-email-notify
20000000001option-shipping-bonus
30000000001option-strict-chaosmagician-sh
40000000001option-errata-cultist-power
.........
1075569ZeicheMasZeuchnisoption-email-notify
1075570ZeicheMasZeuchnisoption-strict-chaosmagician-sh
1075571ZeicheMasZeuchnisoption-errata-cultist-power
1075572ZeicheMasZeuchnisoption-mini-expansion-1
1075573ZeicheMasZeuchnisoption-strict-leech
\n", "

1075574 rows × 2 columns

\n", "
" ], "text/plain": [ " game option\n", "0 0000000001 option-strict-darkling-sh\n", "1 0000000001 option-email-notify\n", "2 0000000001 option-shipping-bonus\n", "3 0000000001 option-strict-chaosmagician-sh\n", "4 0000000001 option-errata-cultist-power\n", "... ... ...\n", "1075569 ZeicheMasZeuchnis option-email-notify\n", "1075570 ZeicheMasZeuchnis option-strict-chaosmagician-sh\n", "1075571 ZeicheMasZeuchnis option-errata-cultist-power\n", "1075572 ZeicheMasZeuchnis option-mini-expansion-1\n", "1075573 ZeicheMasZeuchnis option-strict-leech\n", "\n", "[1075574 rows x 2 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(len(pd.unique(gameoptions['option'])))\n", "print(gameoptions['option'].value_counts() / len(gameoptions))\n", "gameoptions" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "vp 12839907\n", "build:D 6570095\n", "leech:count 5716489\n", "leech:pw 5716489\n", "upgrade:TP 3543848\n", " ... \n", "action:ACTH5 1852\n", "order:6 912\n", "order:7 636\n", "action:ACTH4 592\n", "action:ACTH3 314\n", "Name: event, Length: 91, dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gameevents['event'].value_counts()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "126fe960806d587c78546b30f1a90853b1ada468 77158\n", "95a66999127893f5925a5f591d54f8bcb9a670e6 21052\n", "be8f6ebf549404d015547152d5f2a1906ae8dd90 16184\n", "fdb13a13cd48b7a3c3525f27e4628ff6905aa5b1 1210\n", "224736500d20520f195970eb0fd4c41df040c08c 3\n", "735b073fd7161268bb2796c1275abda92acd8b1a 2\n", "30b6ded823e53670624981abdb2c5b8568a44091 1\n", "c07f36f9e050992d2daf6d44af2bc51dca719c46 1\n", "b109f78907d2cbd5699ced16572be46043558e41 1\n", "Name: base_map, dtype: int64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "games['base_map'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[126fe960806d587c78546b30f1a90853b1ada468](https://terra.snellman.net/map/126fe960806d587c78546b30f1a90853b1ada468)\n", "is the same as [91645cdb135773c2a7a50e5ca9cb18af54c664c4](https://terra.snellman.net/mapedit/91645cdb135773c2a7a50e5ca9cb18af54c664c4) \n", "[95a66999127893f5925a5f591d54f8bcb9a670e6](https://terra.snellman.net/map/95a66999127893f5925a5f591d54f8bcb9a670e6) is different \n", "[be8f6ebf549404d015547152d5f2a1906ae8dd90](https://terra.snellman.net/map/be8f6ebf549404d015547152d5f2a1906ae8dd90) is re-balanced. \n", "\n", "All other maps aren't present here so we'll use those" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Creating the new datasets - some functions to help us make these later " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Victory Points (target) dataset \n", "Let's make some functions to help make the dataset." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "def makenewdf():\n", " \"\"\"make an empty dataframe, organised in the way we want the target data, ready to be populated\"\"\"\n", " validfactions = ['witches', 'auren', 'swarmlings', 'mermaids', 'cultists', 'halflings', 'dwarves', 'engineers', 'chaosmagicians', 'giants', 'fakirs', 'nomads', 'darklings', 'alchemists']\n", " dfcols = ['game'] + validfactions\n", " vpdf = pd.DataFrame(columns=dfcols)\n", " \n", " return vpdf, dfcols, validfactions\n", "\n", "vpdf, dfcols, validfactions = makenewdf()\n", "\n", "sensecheck = False\n", "if sensecheck:\n", " newdf = pd.DataFrame([[np.nan] * 15], columns=dfcols)\n", " newdf['auren'].replace({np.nan: 'test'}, inplace=True)\n", " vpdf = vpdf.append(newdf, ignore_index=True)\n", " print(len(vpdf))\n", " vpdf.head()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "def get_vp_from_game(singleGameEvents):\n", " \"\"\"Input game events for a single game. This is a pd.DataFrame. \n", " Output a row where each faction in the game has its vp populated (the rest are nans)\n", " \"\"\"\n", " newdf = pd.DataFrame([[np.nan] * 15], columns=dfcols)\n", " \n", " # assign the game number\n", " gameno = list(pd.unique(singleGameEvents['game']))\n", " \n", " # assert len(gameno) == 1, 'More than 1 unique game was found' \n", " try:\n", " newdf['game'].replace({np.nan: gameno[0]}, inplace=True)\n", " except:\n", " print(f'DEBUGGING: len of table is {len(singleGameEvents)}')\n", " print(f'DEBUGGING: gamnos list: {gameno}')\n", " print(singleGameEvents)\n", " raise\n", " \n", " # find factions - there are some artifacts in the data. E.g. the \"faction\", \"all\". We need to filter them out.\n", " rawfactions = list(pd.unique(singleGameEvents['faction']))\n", " verifiedfactions = [rawfaction for rawfaction in rawfactions if rawfaction in validfactions]\n", " \n", " for faction in verifiedfactions:\n", " vpfaction = sum(singleGameEvents[(singleGameEvents['event'] == 'vp') & (singleGameEvents['faction'] == faction)]['num'])\n", " newdf[faction].replace({np.nan: vpfaction}, inplace=True)\n", " \n", " return newdf" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time elapsed: 3.7775845527648926s\n", "Time to do all games at current speed: 121.31502925395965hrs\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gamewitchesaurenswarmlingsmermaidscultistshalflingsdwarvesengineerschaosmagiciansgiantsfakirsnomadsdarklingsalchemists
00000000001NaNNaNNaN118.0NaN62.0NaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " game witches auren swarmlings mermaids cultists halflings \\\n", "0 0000000001 NaN NaN NaN 118.0 NaN 62.0 \n", "\n", " dwarves engineers chaosmagicians giants fakirs nomads darklings \\\n", "0 NaN NaN NaN NaN NaN NaN NaN \n", "\n", " alchemists \n", "0 NaN " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# sense check\n", "start = time.time()\n", "singlegame = gameevents[gameevents['game'] == '0000000001']\n", "singlegamefactions = (list(pd.unique(singlegame['faction'])))\n", "vpforgame = get_vp_from_game(singlegame)\n", "end = time.time()\n", "\n", "print(f'Time elapsed: {end-start}s')\n", "print(f'Time to do all games at current speed: {((end-start)*len(pd.unique(gameevents[\"game\"]))/3600)}hrs')\n", "vpforgame" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Features dataset \n", "We'll want:\n", "1. Boolean of bonus tiles (in game or not) - DONE\n", "2. One-hot of round tiles, for each round - DONE\n", "3. Boolean of factions already picked (more info if any have been picked yet) - removed as would require 1 row for each faction. We want 1 line per game for now\n", "4. One-hot for different map variants we'd like to use - DONE\n", "5. One-hot player count (from 2, 3, 4 or 5 players) - DONE" ] }, { "attachments": { "942cd242-5b8b-432f-8843-51185679848e.png": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAIoAAAEKCAYAAADNQM0IAAAXN0lEQVR4Ae2d/ZHjKgzAU2NmtpUrYCc1vA6ypewVcn9sGX6DhNAHOMGbQIDoZt4ztkEI6WdBguI9bf7PLVBhgVNFHa/iFtgcFIegygIOSpWZvJKD4gxUWcBBqTKTV3JQnIEqCzgoVWbySg6KM1BlAQelykxeyUFxBqos4KBUmckrvTUo35+X7fsOAz9f5+30cd1+7tQb6vbfy3Y6xbH9u27n03m7/ntMw4ag/GzXj9N2Oon/Pu+55bHB5K2/t8vptF3+ijtguNN2+vzeABQw6mk7f5VRGB+UwhhnBIUdEMHpCkvBiMRMBCQAc+vflKDIAc0SURiUbduCc2IY//4MT/U1RR2qB46hKKRCPjo9Ragb9yiCQB8k6xSjxq8iymW7iOhI8tEfWi95z47xJKeAzIH4IHF7LZfsJhkI5eIY70SUXRuTbchm4iFqPvUQAGFQ8unEAeq5EwfA64ZQh9rLMhqI6qFBqd4Gg9X32PjazNVrFOlg6YQNnav75qnOjlGOH/WU49eg7I9Xj2HbcPxqjFJHA+S+jXX/WxjbJ6/NOoKiHQpGFMSCYh8MBpgjRSAcRHGakEaBRnLABSNaO985V86FukKmcUK4Lccly9A0jWfbboNyY7yZvkIfuidtonREuQnsUD/ppP1DoujYHJQ0VVDojz1nRoxPp6wP5TTF4EDofhosGEUsmGPYxCesYEQaeeUxB0WAqKIXCoT68QHIxpiccg+UIGtnvJnehTHeAYVsmI5kYxgP21JGqeagJIeaAWZGjKBI5UwTPpVPiTQ+14ilghGzOrcv5KAImVIP6hHWXrhAzsYodc3aCgCtSlldWUHoQ5fvgFJlYylj21pmuBXCHA3EhGi6DE6hz/9wkeZJOsaaynBoKAXk30v8SHxbB+r31tGCos9RfpoSQS+zRpHTqwTFri3AMdT21nittoUxSicrW8V1YtHG39vF6irqDRRR0AAIC4c/AgCeTlqN2+9GTJhOXzYFkdEBIcySLGvqW+dWHyUbGiKoFMbl03ozohjd6BMgtb89XqOxHeMNUEJLOya0S4Q+2VgutJtGFDMYP53aAg0jytR2ceWNBRwUYxA/LVvAQSnbxa8aCzgoxiB+WraAg1K2i181FnBQjEH8tGwBB6VsF79qLOCgGIP4adkCDkrZLn7VWMBBMQbx07IFuoHy8/Oz/ffff9ufP3/u/hfqhfr+bxwLdAMlOP/7+3Z+Kpkl1Av1/d84FugGSogkR/4drX9Ettc9bgEH5bjN3rKFgzKS22UeyUh6tc1w0yM9OpXU1bfJNiHhSSfcoBY6uSivQ/d1W0jwkVlfekjPP3NQNvikc8SyR0CRmWvgXEoWhg7LqZIhI42yyTCR+bxdPs/wC0LS00EhS3TMcKtzPCtWV/9Oviil/SlwsA8NVIApRBM6ijoiouj0RIo+COLlK/zGN6Zwqv7wPqVK2h9ygR7UDo70m6Sgg46Y8oFgS/UpTb5GyUHRUSC/n8wKScfkFAZEtpdl/v1LlJASuCMICQ7TZ6oX2mHd5HCYagg4yu8lnSIkCVTWMY2hY2EJUNLTSk9mMi4am6cYYVmVnS6dwGUJCpRFVjpLQufLPqBuAodrhpJMuJZlqCXXKEo/lBHqJ8i02OZnS4CijSefaFk2ttyJKKEWAUJHaqmmngRCDoqNPqpdgDmCnDk+A4V/jUAPgx4radb+2A2UNt/MlkGQDt57uvV1jiJocjy3i1t2h+w3B0XKBkgSVEcjCk1D3POrSt1AabPXIx1GJrTX0JHqSYQnN//UI182A84WT//P10W8jEb2YUHR5wCKmQopouBvjhgGjDx0jn2kumF4/67bZec9LjT6VsduoLQZQDQmrU3iUUEBHaPzKHyXv0cRi0rZhpwc4Uoy6Hr24zOzjoApjqaQ83YOr89IbfVrKy6f4k1JoIMdn9WxjVVLUicHpTSk3td0BOnde6/+HJSHLe2gPGxCF7COBTyirOPLpiNxUJqadx3hDso6vmw6EgelqXnXEe6grOPLpiPpBkqbb2ab2saFCwt0A6XNXo8YiRebWqAbKHWJSDzWo/W5pZdaWMBBaWHVBWU6KI2dqnePG3fWUPzkoNjd1fGy8BUohay1hr59quglQJFpBTJpCC2Fm3ayDr17ltMXQ502WfgOykFejy5O6+pjRMkhoOQf/Rc9pMoaKATlXhY+JUdTTgr2m+8eBzhIJwYlj34MqtRszPKaESUlBhVAIj/s5MwCQLG9LBMkBAD8lOIr/Oi+FpTSH0ogZcY/LgEKPeHpaEApPrlqvUARJTiMywoUmfis/OqgKHM8elI3lXAvdfVLEUNek2WWDaWdiBLuESB0hPoBFJEkzdIcFLbFE0ptvpktgyAdDOWCg/V1jiI4VDxXWfgeUZ5AQYWINns9JVDsNXzieW1Bv8g7loVPaxGWQ3+2xPQHkaq0mA1GyqNPhemGqNJtjdJmtOiktDZpmYUfBhAhoP4SNPL6x3W7Fj/1oAUgkkU9i2unNoZ6WOrkoDw8fhdQaQEHpdJQ717NQXl3AirH76BUGurdqzko705A5fgdlEpDvXs1B+XdCagcv4NSaah3r9YNlDbfzL67+/qNvxsobfZ6+hnq3XvqBkrdbjC742h9bumlFhZwUFpYdUGZDsqCTm0xpMlBKe0el95zhtv7tOtbfodbnsEv81qS8dW73Dg3N91ftLAEKGm7n7LTVKJSfT6KSlQiWSmtkvJY3gcOyfxyoNhXcupMNh66vh5gCpGIjlhPRxR9jyVxCeqnN1SuBdRyoGjnmuwz9mlMQiJnMgSyvSzbpCWYxkS0QUhIXog+V/FeWtnxnOUlQOG1R3yfa3IgglLMJIOsNFrPMCg3s/B3p7QbQM7JRab1EqDINQr96RK8dsOBAApFAAlKfRY+RBGA5waQmcnnvNANlDbfzJZBkFMGO1M7SF/XoFBUUYtbBRbK4n7Keuge5z7rBkqbvZ6Sg+y1AAFnxYO74kdcnpIsKDGqhIWpmcb4HOWSDIBG/JkW/e78uSEJ2ncDpY2pEAq7RtFTUegZncr1aG1CWuWgpDYJlFyO7Qd+Z+yfesiofnxHC0weUd7RZa8Zs4PyGrtP16uDMp3LXqOwg/Iau0/Xq4Myncteo7CD8hq7T9ergzKdy16jcDdQ2nwz+xqjvWOv3UBps9fzji57zZi7gXI0q/5o/deY7316dVDex9cPjdRBech8obHeRbbi+IXE9s5c55ODUto9tjvD7Mzbu8eVWfiZfx2UzCSPXDi65qirb3NPSq80R0eqlICdfBSVqFTKwi8a4DYoxSYTXlwiouQQUIpjCRz0UjnDLTidIxJnsGEbnW9C9SIoX9ftTLkoIrdWTT2Up/t12VJ0U/ku4xK0HCjauXnESa5QqY0MiGwvy/AzEAFAOMfsNgSF32qt+8xBkVlzEbK/SathC0uAkp5OeqLTU4pOo3RF5QV6uv+FqwyKLEtQoCxSHVlW7uwAB0W5HBSKRChB3Wehw5WWAIWcgtaVT7QsG9vvRJRQiwChI7UEpxKMKbo8Bortg/oa7dgNlDbfzJZBkMaHcnIqm19flxEl1MFzu7gVrbfrB0WNx0DxiMJWhVKbvZ4SKPYaOlJFnZ1PPVeYhlBxAClEjziN6ax62ccDoKjpzxhssNNuEaXNuNFhdo2ioICO0ZlcT68TKIJIUPAag0J/Xi7JSOugo6DEXzPGKay4fmpjrIekTg7KQ2Pv33iiCGKN46BYi7Q8d1BaWtdlj2ABjygjeGECHRyUCZw0gooOyghemEAHB2UCJ42gooMyghcm0KEbKG2+mZ3Awouo2A2UNns9i3hhgmF0A6UuY40tdrQ+t/RSCws4KC2suqBMB2VBp7YY0uSglHaP7c5wMFvN7vH9LHyVuHQ6xVTIFm4J+pbG0aKvOplLgCLTCnRCEkMi61DKAG/xo2NsohLIkukEMgGqyQafBN9BqUK4bjErE4iiWEhKapOFrxXXeSiYqXaFzDfMWWFHa+BQ5wQpAMf6Yh+liGKuhXEKcEP/6mHQyj58tmZESVGgABKZTDmInSCdKsvULB0BSIYBpyU+h7bkSOlU6NckRFG9JJz1SZc2DZjuT9/jNs8rLQFKyjqjxGcDSnp6pd3U1CEdw+UiKORos0bBiPLNPezIDzLPX9ftEjP68fyH20GJdZA3WJ9w/7JdQVZoi+eid9nsKeUlQNEhV0YRWTb2AkdSyNeOIYfQ0bSMp6ENL2gzUNRPQPiJ//7EqBPqB4DpqPvQ+qR7FJmC7uFhCMcQjeiYKj6/0A2UNt/MlkGQDoZyFtrtLwitY/DcLm6t+YOTCdIMFABRT0UQSUiX4PTPS4osWrbVh+6G6xhJMEqG8Z+3oCfpQTWffewGSpu9nhIo9lowLjsUDAjrC44GpeRqAExk4cPTm6a0Lf69H5ZhQbHn9EmLHYp6yQUpO3cPFBzbSfwQjfQsTq8s8OFSN1Ae1rQogAynM9vZGdQoOoXWMNl3FCXHxDYJjrwv6RwAI8k/qU8kqEXeR2ijdc37sCDJaAlyIXLRFErjff5xclCeb5DfSswiyG8FDdrOQXmSYxyUJxnSxcxtAY8oc/uvm/YOSjdTz92RgzK3/7pp76B0M/XcHTkoc/uvm/bdQGnzzWw3O719R91AabPX8/b+62aAbqDUJSLxuI/W55ZeamEBB6WFVReU6aD82qm4aSg3Bn8taoKGk4NS2G3NdoaDF+JOcNrd5TwR9BHd19eznVrlUAdFmeNZJ0fXHHX1ERS5VQ/OpeQgUB4dKutQbghHg1AHE4DoLZChqYPC3l8iouQQcH5GDg4OXl9HUK7/6CjqpHwUTlaSOboAm8lmg9aQHIV64M5yOUOfXTF2aTlQdBTII05yh0r4YUBke1mm6YuhDG10hhvfw1xYOgdQxJSoIU0aDV1YAhT5hEM5RQEEhacY4QsVBRgUmRapQBERAqVoUGA6S1OelIfQyCkN0ioFOEKrYYtLgEJPLlpZRhFZNj7YiSihFgFCR2gZQEkghCsGFJl1b+rmSU0aJKPZkKfdQGnzzWwZBOlgKCsHox/0des4PFdZ+PciSkAn5sCGo4xiGSgqmg3JRaZUN1Da7PWUQLHX8MlXUQecLp1pQYlRRWbh2wiSyYiL3Y/zdhZZ8sHiFhR7nnllwAvdQGkzdoTCrlEUFNAxwsL19Pclcl3CesY2ab2zbfSxGtdB4Tc5ErbQMuoj2xAo6TucUoY+9zpqaXJQRjWr1mvGCKJHsG0OirVIg3MHpYFRXeSYFvCIMqZfhtPKQRnOJWMq5KCM6ZfhtHJQhnPJmAo5KGP6ZTituoHS5pvZ4ey5rELdQGmz17OsX4YbWDdQ6jLW2D5H63NLL7WwgIPSwqoLynRQFnRqiyFNDkpp99juDAez1ewe33kX/oQ5JM8EZglQZFqBTkhiSGQdShfg5CLMR1GJSiLTDQzuoDyTu31ZRxendfVtkhLljDw7Cz+PXAEygvL6yW+lJPjyHWOpK0a4y9d1O1OeSiELb9+a/e+sGVFS4pB0jjHuTs4sOD+2l+VSQjTcl8lLMl1SyaefehDAcSpMcNzQ06j9qtMlQOHMtfhkG1DoKVdGVlMJTj3Xf6EGl6tASc6mtpT1hs6nvpUsm1ZJ05ySpbR9+ckSoKj1R0xHxGs3nlT1xDMcwSPkVDqClxRY6De4r5y7B4e+TotrggikhWikZGEfo/y/GyhtvpktgyAdnDuz5GQNCkUVtbitAiXIoYgippvQVkFg6hGcqs4oiKAe3UBps9dTAsVeQ6eoqJNl0FtQKrLwC84tQRkWteeQmf8l/9SKBcWejwVJ0KYbKG2GjlDYNYp2SugZHcH17HctOSipTVrvCHhi1AAw6FMLHGmxKkYLUJb6409KQa9cZyFjgOLkoLzWgqUIUqfR+BHEjsNBsRY5cO6gHDCWV30PC3hEeQ8/PzxKB+VhE76HAAflPfz88CgdlIdN+B4CHJT38PPDo+wGSptvZh8evwuotEA3UNrs9VSO0qs9bIFuoNQlIvF4jtbnll5qYQEHpYVVF5TpoCzo1BZDmhyU0u6x3akNZqvZPb6Thd/C+hPJXAIUuUWfb9QhJLLOr7LwJ3JqC1WXAwUh4LyQHBw0o75O+Sh0FHVEPgpk1qf8E4xcWg624wx8hHSmbPs9yJYDBRyXnGuz3YQZdnJmZXtZBgBlquLfS3zpsIaL0ig5UVu+LvSGPkK1EYtLgMKZa+2y8AEa86JhciikO1Kqo0qSjhHlL9Xk3wLJxEi+O25pCVDU+qNVFn5YEosfeqmMeQGHgqbws4wsMo3LhtKsGyhtvpkth3I5ZUBZThlx+Pp6efpQWfjKbLZfah+OvD6iT1vyZxm6XyV06JNuoLTZ67EOC7a21zD8q6jziyz8n6/LhuuOUh84pYRse/XnVrKIgrpIcIamQyjXDRTR5xOLCIVdoygooDd0ENez37WE+6VrJ3Z8hCvJSAvmOBxYHIvf9BT7HT/bfs85k4OyN6xRrs8bQawFHRRrkaeeOyhPNacLG98CHlHG99EQGjooQ7hhfCUclPF9NISGDsoQbhhfCQdlfB8NoWE3UNp8MzuEDd9CiW6gtNnreQsfDTHIbqAczao/Wn8Iay6shIOysHOfOTQH5ZnWXFjW5KCUdo/tLnDwXs3ucX0Wvk5OWpgOMbQlQJFpBXliEEIi6/w2Cx9kx+RqJU8YdNXicqC0zMInCEoRBdIkP6/b9YPe9liKbCRhvuNyoMBTn5KKbLabcNDRLHzRdBcUkfyURzYhYMLiEqCkrDP6zY0BpZh6CKDQUx+mp7ysoWPv7oKS+qW3VpNMbjtraQlQ9HpBRhFZNi7aiSihFgFCR9MSsvF1nzFDX4ICC2gHxdru7nmbb2bLIEgHQ/nJWfgeUe66+/cV2uz1lECx1+o/9XCWvXiduYoSOP4aUEIdnZH/e9uN0LLb1NNmsAiFXaPYaaHuexQ7TcTvXgQoEJ1oHQRHbgNgyHuFKNbGBn2kTg5KHyPV9LJaBLFjdlCsRX557qD80nDebC0LeERZy5/NRuOgNDPtWoIdlLX82Ww0Dkoz064l2EFZy5/NRtMNlDbfzDaziws2FugGSpu9HjMaP21mgW6gHM2qP1q/mYVcMFjAQXEQqizgoFSZ6VYl3DwsJkfRmyTFxuItSSPfmxyU0u4x7+iy4eNOcNrdtXXovr4u81pYli05KNYiD50fXXPU1be5J6UX/qIjVerBzlsh7etCnwHKQ0YbqPESESWHgN/1eizDLUDFUcWConNOqF6MKF/X7UwRS+SiqF1lytP9umwph4amJbr3T9ABQONYUM7rsvyXA0U7N484yQ07ObOyvSxnb5xW78Lff999DorMfNPTVqgroZfnIMdCLIBM42pUWAKU9HTSE01PaXw5cXGhqZ5gGUm4LEGBsnorNXlEOztczRxM+qg+sT0AQPdDBEnOZz1IpkqtLMgijVoclwBFPoX6zdXHI0owMgFCRzI8PtXxB17KofpFxEdA0X0IOBQ0r8/y7wZKm29myyBI40M5OZVcbhe9wkFQBc/t4la0hl8EIqCh7u9BURFFRKNwXUZCW29bNaK02espgWKvoSNV1Nn51HMrC3//XfgPgFJydrj2cd7OZpqzoNhzhrhNqVtEaaM+QmHXKAoK6BidyfXoEwtpFe6XromFZ4QryaB1RfaHEe6tUei3yXiUUQO1iWNK8vEqgEFrsHAsREkaTYvj5KC0MElDmaUIUtld7whi1XJQrEVanjsoLa3rskewgEeUEbwwgQ4OygROGkFFB2UEL0ygg4MygZNGUNFBGcELE+jgoEzgpBFUdFBG8MIEOjgoEzhpBBUdlBG8MIEODsoEThpBRQdlBC9MoIODMoGTRlDRQRnBCxPo4KBM4KQRVHRQRvDCBDo4KBM4aQQVHZQRvDCBDg7KBE4aQcX/AQ2Rq5P5RHWPAAAAAElFTkSuQmCC" } }, "cell_type": "markdown", "metadata": {}, "source": [ "These are the bonus tiles available: \n", "![image.png](attachment:942cd242-5b8b-432f-8843-51185679848e.png)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['pass:BON10', 'build:D', 'pass:BON3'], dtype=object)" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.unique(gameevents[(gameevents['round'] == 0) & (gameevents['game'] == 'ytuwertqwtr')]['event'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "But it looks like we only have details on the bonus tiles selected in the middle of the game. Where can we get information about all bonus tiles in game!? I'm not sure, so I'll use the only info that's available - which is the bonus tiles that players select. However, if there's a bonus tile that was never selected during a game, it won't be shown. If we go onto the Snellman website and re-watch an old game, we can see what bonus tiles are available in the GUI, so this info must be stored somewhere (as game history GUI must be using the same data we're looking at...?)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time elapsed: 3.7590675354003906s\n", "['BON8', 'BON6', 'BON3', 'BON10', 'BON9']\n" ] } ], "source": [ "# working out the code to get the bonus tiles & timing this\n", "start = time.time()\n", "allevents = list(pd.unique(gameevents[gameevents['game'] == 'ytuwertqwtr']['event']))\n", "bonustiles = [event[5:] for event in allevents if event.startswith('pass:BON')]\n", "end = time.time()\n", "print(f'Time elapsed: {end-start}s')\n", "print(bonustiles)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['game', 'r1_SCORE1', 'r1_SCORE2', 'r1_SCORE3', 'r1_SCORE4', 'r1_SCORE5', 'r1_SCORE6', 'r1_SCORE7', 'r1_SCORE8', 'r1_SCORE9', 'r2_SCORE1', 'r2_SCORE2', 'r2_SCORE3', 'r2_SCORE4', 'r2_SCORE5', 'r2_SCORE6', 'r2_SCORE7', 'r2_SCORE8', 'r2_SCORE9', 'r3_SCORE1', 'r3_SCORE2', 'r3_SCORE3', 'r3_SCORE4', 'r3_SCORE5', 'r3_SCORE6', 'r3_SCORE7', 'r3_SCORE8', 'r3_SCORE9', 'r4_SCORE1', 'r4_SCORE2', 'r4_SCORE3', 'r4_SCORE4', 'r4_SCORE5', 'r4_SCORE6', 'r4_SCORE7', 'r4_SCORE8', 'r4_SCORE9', 'r5_SCORE1', 'r5_SCORE2', 'r5_SCORE3', 'r5_SCORE4', 'r5_SCORE5', 'r5_SCORE6', 'r5_SCORE7', 'r5_SCORE8', 'r5_SCORE9', 'r6_SCORE1', 'r6_SCORE2', 'r6_SCORE3', 'r6_SCORE4', 'r6_SCORE5', 'r6_SCORE6', 'r6_SCORE7', 'r6_SCORE8', 'r6_SCORE9', 'BON1', 'BON2', 'BON3', 'BON4', 'BON5', 'BON6', 'BON7', 'BON8', 'BON9', 'BON10', '2players', '3players', '4players', '5players', 'map1', 'map2', 'map3']\n", "72\n" ] } ], "source": [ "def emptyfeaturesdf():\n", " \"\"\"make an empty dataframe, organised in the way we want the feature data, ready to be populated\"\"\"\n", " colnames = ['game']\n", " uniqueScoreTiles = np.sort(pd.unique(gamescoringtiles['tile']))\n", " \n", " # One-hot of round tiles, for each round\n", " for gameround in range(1, 7):\n", " roundstr = f'r{gameround}'\n", " for tile in uniqueScoreTiles:\n", " colnames.append(roundstr + '_' + tile)\n", " \n", " # Boolean of bonus tiles\n", " for bon in range(1, 11):\n", " colnames.append(f'BON{bon}')\n", " \n", " # One-hot player count (from 2, 3, 4 or 5 players)\n", " for player in range(2, 6):\n", " colnames.append(f'{player}players')\n", " \n", " # one hot of the map used\n", " \"\"\"126fe960806d587c78546b30f1a90853b1ada468 - map1\n", " 95a66999127893f5925a5f591d54f8bcb9a670e6 - map2\n", " be8f6ebf549404d015547152d5f2a1906ae8dd90 - map3\n", " \"\"\"\n", " colnames = colnames + ['map1', 'map2', 'map3']\n", " \n", " featuresdf = pd.DataFrame(columns=colnames)\n", " \n", " return featuresdf, colnames\n", "\n", "featuresdf, featcolnames = emptyfeaturesdf()\n", "print(featcolnames)\n", "print(len(featcolnames))\n" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "def get_features_from_game(singlegameevents, singlegamemeta, singlegameST, singleendplayers=None):\n", " \"\"\"\n", " Inputs:\n", " singlegameevents - is game events for a single game \n", " singlegamemeta - is a single row from `games` that gives map & player count\n", " singlegameST - is a single row from `gamescoringtiles` that gives... score tile (suprisingly)\n", " singleendplayers - is a single row from `end players` that gives the amount of players at end of game, after dropouts\n", " Return: - a row where features have been found (will be sparse)\n", " \"\"\"\n", " newdf = pd.DataFrame([[0] * len(featcolnames)], columns=featcolnames)\n", " \n", " # assign game string\n", " singlegamemeta.iloc[0]['game']\n", " newdf['game'].replace({0: singlegamemeta.iloc[0]['game']}, inplace=True)\n", " \n", " # find the round tiles for each round\n", " for gameround in range(1, 7):\n", " roundstr = f'r{gameround}'\n", " scoretile = roundstr + '_' + singlegameST[singlegameST['round'] == gameround]['tile'].values[0]\n", " newdf[scoretile].replace({0: 1}, inplace=True)\n", " \n", " # Boolean of bonus tiles\n", " uniqueevents = list(pd.unique(singlegameevents['event']))\n", " bonustiles = [event[5:] for event in uniqueevents if event.startswith('pass:BON')]\n", " for bontile in bonustiles:\n", " newdf[bontile].replace({0: 1}, inplace=True)\n", " \n", " # One-hot player count (from 2, 3, 4 or 5 players)\n", " if singleendplayers is None:\n", " noplayers = singlegamemeta.iloc[0]['player_count']\n", " print('gamemeta used for player count')\n", " else:\n", " noplayers = singleendplayers.iloc[0]['endplayers']\n", " \n", " players = f'{noplayers}players'\n", " newdf[players].replace({0: 1}, inplace=True)\n", " \n", " # one hot of the map used\n", " mapdict = {'126fe960806d587c78546b30f1a90853b1ada468': 'map1',\n", " '95a66999127893f5925a5f591d54f8bcb9a670e6': 'map2',\n", " 'be8f6ebf549404d015547152d5f2a1906ae8dd90': 'map3'\n", " }\n", " basemap = singlegamemeta.iloc[0]['base_map']\n", " gamemap = mapdict[basemap]\n", " newdf[gamemap].replace({0: 1}, inplace=True)\n", " \n", " return newdf" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gamemeta used for player count\n", "Time elapsed: 4.1567041873931885s\n", "Time to do all games at current speed: 133.49024569802813hrs\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gamer1_SCORE1r1_SCORE2r1_SCORE3r1_SCORE4r1_SCORE5r1_SCORE6r1_SCORE7r1_SCORE8r1_SCORE9...BON8BON9BON102players3players4players5playersmap1map2map3
00000000001000000010...0111000100
\n", "

1 rows × 72 columns

\n", "
" ], "text/plain": [ " game r1_SCORE1 r1_SCORE2 r1_SCORE3 r1_SCORE4 r1_SCORE5 \\\n", "0 0000000001 0 0 0 0 0 \n", "\n", " r1_SCORE6 r1_SCORE7 r1_SCORE8 r1_SCORE9 ... BON8 BON9 BON10 \\\n", "0 0 0 1 0 ... 0 1 1 \n", "\n", " 2players 3players 4players 5players map1 map2 map3 \n", "0 1 0 0 0 1 0 0 \n", "\n", "[1 rows x 72 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# sense check\n", "start = time.time()\n", "singlegame = gameevents[gameevents['game'] == '0000000001']\n", "singlegamemeta = games[games['game'] == '0000000001']\n", "singlegameST = gamescoringtiles[gamescoringtiles['game'] == '0000000001']\n", "vpforgame = get_vp_from_game(singlegame)\n", "featsforgame = get_features_from_game(singlegame, singlegamemeta, singlegameST)\n", "end = time.time()\n", "\n", "print(f'Time elapsed: {end-start}s')\n", "print(f'Time to do all games at current speed: {((end-start)*len(pd.unique(gameevents[\"game\"]))/3600)}hrs')\n", "featsforgame" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Speeding up dataset creation \n", "350 hours: that's way too long. How long will it take to split the table into 1500, 100 game tables and work on those?" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time elapsed to make small table: 1.3441839218139648s\n", "gamemeta used for player count\n", "Time elapsed to get data for single game now: 0.02479243278503418s\n", "Time elapsed to do all small tables: 0.4316771988021003hrs\n", "Time elapsed to do all data creation: 0.7961952053176032hrs\n", "Time elapsed to do all processing: 1.2278724041197036hrs\n" ] } ], "source": [ "first100games = gameslist[:100]\n", "\n", "start = time.time()\n", "gameevents100 = gameevents[gameevents['game'].isin(first100games)]\n", "gamemeta100 = games[games['game'].isin(first100games)]\n", "gameST100 = gamescoringtiles[gamescoringtiles['game'].isin(first100games)]\n", "end = time.time()\n", "smalltablestime = end-start\n", "\n", "print(f'Time elapsed to make small table: {smalltablestime}s')\n", "\n", "start = time.time()\n", "singlegame = gameevents100[gameevents100['game'] == '0000000001']\n", "singlegamemeta = gamemeta100[gamemeta100['game'] == '0000000001']\n", "singlegameST = gameST100[gameST100['game'] == '0000000001']\n", "vpforgame = get_vp_from_game(singlegame)\n", "featsforgame = get_features_from_game(singlegame, singlegamemeta, singlegameST)\n", "end = time.time()\n", "datacreatetime = end-start\n", "\n", "print(f'Time elapsed to get data for single game now: {end-start}s')\n", "totalsmalltablestime = smalltablestime * len(pd.unique(gameevents[\"game\"]))/(100 * 3600) # divide by 100 as that's the size of table, divide by 3600 for s -> hrs\n", "totaldatacreatetime = datacreatetime * len(pd.unique(gameevents[\"game\"]))/3600\n", "print(f'Time elapsed to do all small tables: {totalsmalltablestime}hrs')\n", "print(f'Time elapsed to do all data creation: {totaldatacreatetime}hrs')\n", "print(f'Time elapsed to do all processing: {totalsmalltablestime + totaldatacreatetime}hrs')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Damn. I've just saved ~200 times the amount of time by doing that. As a complete aside, for pure interest, I wonder what the best trade-off between spitting into smaller & smaller tables (which increases time to do this) is vs saving time to then filter with smaller tables, is? (will check this out later)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Progressed to 100th game\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "Progressed to 200th game\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "gamemeta used for player count\n", "no of unique games in table is: 200\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gamewitchesaurenswarmlingsmermaidscultistshalflingsdwarvesengineerschaosmagiciansgiantsfakirsnomadsdarklingsalchemists
00000000001NaNNaNNaN118.0NaN62.0NaNNaNNaNNaNNaNNaNNaNNaN
1050615NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN122.0NaN
20512NaNNaNNaNNaNNaNNaNNaN124.0101.0NaNNaNNaNNaNNaN
3051501NaN84.0NaNNaNNaN77.0NaNNaN72.0NaNNaN54.078.0NaN
4060303NaNNaNNaN108.0NaNNaNNaN121.0NaNNaN78.0NaN128.0NaN
................................................
1954pLeague_S7_D6L17_G7NaNNaN48.0NaNNaNNaNNaNNaN73.0NaNNaN130.087.0NaN
1964pLeague_S7_D6L18_G2NaN105.0NaNNaNNaNNaNNaNNaN108.0NaNNaN128.053.0NaN
1974pLeague_S7_D6L18_G5NaNNaN97.0NaNNaNNaN82.0NaNNaNNaNNaN110.0117.0NaN
1984pLeague_S7_D6L18_G6NaNNaN78.0NaNNaN103.0NaNNaN137.0NaNNaNNaN61.0NaN
1994pLeague_S7_D6L18_G795.0NaNNaNNaNNaN122.0NaNNaN90.0NaNNaNNaN120.0NaN
\n", "

200 rows × 15 columns

\n", "
" ], "text/plain": [ " game witches auren swarmlings mermaids cultists \\\n", "0 0000000001 NaN NaN NaN 118.0 NaN \n", "1 050615 NaN NaN NaN NaN NaN \n", "2 0512 NaN NaN NaN NaN NaN \n", "3 051501 NaN 84.0 NaN NaN NaN \n", "4 060303 NaN NaN NaN 108.0 NaN \n", ".. ... ... ... ... ... ... \n", "195 4pLeague_S7_D6L17_G7 NaN NaN 48.0 NaN NaN \n", "196 4pLeague_S7_D6L18_G2 NaN 105.0 NaN NaN NaN \n", "197 4pLeague_S7_D6L18_G5 NaN NaN 97.0 NaN NaN \n", "198 4pLeague_S7_D6L18_G6 NaN NaN 78.0 NaN NaN \n", "199 4pLeague_S7_D6L18_G7 95.0 NaN NaN NaN NaN \n", "\n", " halflings dwarves engineers chaosmagicians giants fakirs nomads \\\n", "0 62.0 NaN NaN NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN NaN NaN NaN \n", "2 NaN NaN 124.0 101.0 NaN NaN NaN \n", "3 77.0 NaN NaN 72.0 NaN NaN 54.0 \n", "4 NaN NaN 121.0 NaN NaN 78.0 NaN \n", ".. ... ... ... ... ... ... ... \n", "195 NaN NaN NaN 73.0 NaN NaN 130.0 \n", "196 NaN NaN NaN 108.0 NaN NaN 128.0 \n", "197 NaN 82.0 NaN NaN NaN NaN 110.0 \n", "198 103.0 NaN NaN 137.0 NaN NaN NaN \n", "199 122.0 NaN NaN 90.0 NaN NaN NaN \n", "\n", " darklings alchemists \n", "0 NaN NaN \n", "1 122.0 NaN \n", "2 NaN NaN \n", "3 78.0 NaN \n", "4 128.0 NaN \n", ".. ... ... \n", "195 87.0 NaN \n", "196 53.0 NaN \n", "197 117.0 NaN \n", "198 61.0 NaN \n", "199 120.0 NaN \n", "\n", "[200 rows x 15 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# sense check for 100 games\n", "vpdf, _, _ = makenewdf()\n", "featdf, _ = emptyfeaturesdf()\n", "gamelengthlen = len(gameslist)\n", "gamesroundup = math.ceil(gamelengthlen / 100.0) * 100\n", "jj = 0\n", "\n", "for ii in range(100, 201, 100): \n", " ii = min(ii, gamelengthlen) \n", " print(f'Progressed to {ii}th game')\n", " \n", " next100games = gameslist[jj:ii]\n", " jj = ii # so that we don't get any repetitions at the very end, where our set will be smaller\n", " \n", " gameevents100 = gameevents[gameevents['game'].isin(next100games)]\n", " gamemeta100 = games[games['game'].isin(next100games)]\n", " gameST100 = gamescoringtiles[gamescoringtiles['game'].isin(next100games)]\n", " \n", " for game in next100games:\n", " singlegame = gameevents100[gameevents100['game'] == game]\n", " singlegamemeta = gamemeta100[gamemeta100['game'] == game]\n", " singlegameST = gameST100[gameST100['game'] == game]\n", " \n", " if not len(singlegame) == 0: \n", " vpforgame = get_vp_from_game(singlegame)\n", " featsforgame = get_features_from_game(singlegame, singlegamemeta, singlegameST)\n", " \n", " vpdf = vpdf.append(vpforgame, ignore_index=True)\n", " featdf = featdf.append(featsforgame, ignore_index=True)\n", " \n", "print(f\"no of unique games in table is: {len(list(pd.unique(vpdf['game'])))}\")\n", "vpdf" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gamer1_SCORE1r1_SCORE2r1_SCORE3r1_SCORE4r1_SCORE5r1_SCORE6r1_SCORE7r1_SCORE8r1_SCORE9...BON8BON9BON102players3players4players5playersmap1map2map3
00000000001000000010...0111000100
1050615000010000...1100010001
20512000010000...1100010100
3051501000000010...1000001100
4060303010000000...1110010010
..................................................................
1954pLeague_S7_D6L17_G7000000100...1110010100
1964pLeague_S7_D6L18_G2000000100...1010010100
1974pLeague_S7_D6L18_G5000000010...1100010100
1984pLeague_S7_D6L18_G6000000010...1010010100
1994pLeague_S7_D6L18_G7000000010...1110010100
\n", "

200 rows × 72 columns

\n", "
" ], "text/plain": [ " game r1_SCORE1 r1_SCORE2 r1_SCORE3 r1_SCORE4 r1_SCORE5 \\\n", "0 0000000001 0 0 0 0 0 \n", "1 050615 0 0 0 0 1 \n", "2 0512 0 0 0 0 1 \n", "3 051501 0 0 0 0 0 \n", "4 060303 0 1 0 0 0 \n", ".. ... ... ... ... ... ... \n", "195 4pLeague_S7_D6L17_G7 0 0 0 0 0 \n", "196 4pLeague_S7_D6L18_G2 0 0 0 0 0 \n", "197 4pLeague_S7_D6L18_G5 0 0 0 0 0 \n", "198 4pLeague_S7_D6L18_G6 0 0 0 0 0 \n", "199 4pLeague_S7_D6L18_G7 0 0 0 0 0 \n", "\n", " r1_SCORE6 r1_SCORE7 r1_SCORE8 r1_SCORE9 ... BON8 BON9 BON10 2players \\\n", "0 0 0 1 0 ... 0 1 1 1 \n", "1 0 0 0 0 ... 1 1 0 0 \n", "2 0 0 0 0 ... 1 1 0 0 \n", "3 0 0 1 0 ... 1 0 0 0 \n", "4 0 0 0 0 ... 1 1 1 0 \n", ".. ... ... ... ... ... ... ... ... ... \n", "195 0 1 0 0 ... 1 1 1 0 \n", "196 0 1 0 0 ... 1 0 1 0 \n", "197 0 0 1 0 ... 1 1 0 0 \n", "198 0 0 1 0 ... 1 0 1 0 \n", "199 0 0 1 0 ... 1 1 1 0 \n", "\n", " 3players 4players 5players map1 map2 map3 \n", "0 0 0 0 1 0 0 \n", "1 0 1 0 0 0 1 \n", "2 0 1 0 1 0 0 \n", "3 0 0 1 1 0 0 \n", "4 0 1 0 0 1 0 \n", ".. ... ... ... ... ... ... \n", "195 0 1 0 1 0 0 \n", "196 0 1 0 1 0 0 \n", "197 0 1 0 1 0 0 \n", "198 0 1 0 1 0 0 \n", "199 0 1 0 1 0 0 \n", "\n", "[200 rows x 72 columns]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featdf" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Looks good! We can soon do it with the full set of data... after we've done some cleaning.\n", "\n", "## Cleaning and filtering the data \n", "\n", "Filtering bad data. We'll need to check... \n", "1. gameevents have the same game list as games - DONE\n", "2. the game has finished - DONE - more naunced: dropouts will change the feature, \"no. of players\". Therefore \"no. of players\" will be assigned to the end no. of players (after dropout) \n", "3. the map used (only use a sub-set of maps, or we could one-hot a list of accepted maps in X (features) - DONE\n", "4. additional rules? Look up - left for now as v bitty: will add a bit of noise but not a lot\n", "5. Number of players. Possible for 2, 3, 4 or 5 player games, we could one-hot the differences as there might be similarities in different players that can contribute to predictions between different player games. Although >5 players should be removed. - DONE\n", "6. No extra factions (shapeshifters etc) - more naunced: these will be filtered out at the stage of making the data, using \"valid factions\". However we don't want to filter out any games where a valid faction is playing against an extra faction (we just won't include that faction's vp data). So we include it in the \"player dropped\" group." ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "data = dict()\n", "data['gameevents'] = gameevents\n", "data['games'] = games\n", "data['gamescoringtiles'] = gamescoringtiles" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "def filteringByBadgames(data, badgames):\n", " \"\"\" Data is a dict containing gameevents, games, gamescoringtiles\n", " badgames is a pd.dataframe that contains ['game'] to filter by\n", " \"\"\"\n", " gameeventsfil = data['gameevents']\n", " gamesfil = data['games']\n", " gamescoringtilesfil = data['gamescoringtiles']\n", " \n", " badgameslist = badgames['game']\n", " gameeventsfilbefore = len(gameeventsfil)\n", " gamesbefore = len(gamesfil)\n", " gameSTbefore = len(gamescoringtilesfil)\n", "\n", " gameeventsfil = gameeventsfil[~gameeventsfil['game'].isin(badgameslist)]\n", " gamesfil = gamesfil[~gamesfil['game'].isin(badgameslist)]\n", " gamescoringtilesfil = gamescoringtilesfil[~gamescoringtilesfil['game'].isin(badgameslist)]\n", "\n", " print(f'game events before: {gameeventsfilbefore}, game events after: {len(gameeventsfil)}, game events removed: {gameeventsfilbefore-len(gameeventsfil)}')\n", " print(f'games before: {gamesbefore}, games after: {len(gamesfil)}, games removed: {gamesbefore-len(gamesfil)}')\n", " print(f'gameST before: {gameSTbefore}, gameST after: {len(gamescoringtilesfil)}, games removed: {gameSTbefore-len(gamescoringtilesfil)}')\n", " \n", " data['gameevents'] = gameeventsfil\n", " data['games'] = gamesfil\n", " data['gamescoringtiles'] = gamescoringtilesfil\n", " \n", " return data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `Gameevents` games are same as `games` games" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# games from game_events are in games:\n", "gamesymmetricdif = set(gameslist) ^ set(games['game'])\n", "len(gamesymmetricdif)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So we have all the same games. Cool." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Removing the 1, 6, 7 player games\n", "This was a problem noted here: https://www.kaggle.com/lemonkoala/some-faulty-data" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
base_mapgamelast_updateplayer_count
1267be8f6ebf549404d015547152d5f2a1906ae8dd90HenriMustGoDown2015-07-10 06:03:396
3110be8f6ebf549404d015547152d5f2a1906ae8dd907pmatch32015-11-23 17:34:267
3896be8f6ebf549404d015547152d5f2a1906ae8dd90GTMDHXD2015-11-02 05:54:087
476095a66999127893f5925a5f591d54f8bcb9a670e6seven112015-11-26 10:38:097
5728be8f6ebf549404d015547152d5f2a1906ae8dd906playerGregvlwonthelastone2016-01-11 14:43:196
...............
112011126fe960806d587c78546b30f1a90853b1ada4687pmatch2015-08-22 14:35:387
112930be8f6ebf549404d015547152d5f2a1906ae8dd90MEGA2KING2015-08-26 12:21:046
113411126fe960806d587c78546b30f1a90853b1ada468ScruffyLookingNerfHerder2015-08-18 02:13:186
113813be8f6ebf549404d015547152d5f2a1906ae8dd90youmi0232015-08-27 08:07:337
113846126fe960806d587c78546b30f1a90853b1ada468YutoriGoGo0092015-08-29 00:32:157
\n", "

83 rows × 4 columns

\n", "
" ], "text/plain": [ " base_map game \\\n", "1267 be8f6ebf549404d015547152d5f2a1906ae8dd90 HenriMustGoDown \n", "3110 be8f6ebf549404d015547152d5f2a1906ae8dd90 7pmatch3 \n", "3896 be8f6ebf549404d015547152d5f2a1906ae8dd90 GTMDHXD \n", "4760 95a66999127893f5925a5f591d54f8bcb9a670e6 seven11 \n", "5728 be8f6ebf549404d015547152d5f2a1906ae8dd90 6playerGregvlwonthelastone \n", "... ... ... \n", "112011 126fe960806d587c78546b30f1a90853b1ada468 7pmatch \n", "112930 be8f6ebf549404d015547152d5f2a1906ae8dd90 MEGA2KING \n", "113411 126fe960806d587c78546b30f1a90853b1ada468 ScruffyLookingNerfHerder \n", "113813 be8f6ebf549404d015547152d5f2a1906ae8dd90 youmi023 \n", "113846 126fe960806d587c78546b30f1a90853b1ada468 YutoriGoGo009 \n", "\n", " last_update player_count \n", "1267 2015-07-10 06:03:39 6 \n", "3110 2015-11-23 17:34:26 7 \n", "3896 2015-11-02 05:54:08 7 \n", "4760 2015-11-26 10:38:09 7 \n", "5728 2016-01-11 14:43:19 6 \n", "... ... ... \n", "112011 2015-08-22 14:35:38 7 \n", "112930 2015-08-26 12:21:04 6 \n", "113411 2015-08-18 02:13:18 6 \n", "113813 2015-08-27 08:07:33 7 \n", "113846 2015-08-29 00:32:15 7 \n", "\n", "[83 rows x 4 columns]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "badgames = games[games[\"player_count\"].isin([1, 6, 7])]\n", "badgames" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "game events before: 73419074, game events after: 73332500, game events removed: 86574\n", "games before: 115612, games after: 115529, games removed: 83\n", "gameST before: 693660, gameST after: 693162, games removed: 498\n" ] } ], "source": [ "data = filteringByBadgames(data, badgames)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Checking the game has finished\n", "To check this, we can look at game events. If every player has passed their bonus tile on round 6, they've all made it to the end. If not, they've dropped out." ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['action:ACT1', 'action:ACT2', 'action:ACT4', 'action:FAV6',\n", " 'advance:ship', 'bridge', 'build:D', 'burn', 'decline:count',\n", " 'decline:pw', 'dig', 'favor:FAV12', 'favor:any', 'leech:count',\n", " 'leech:pw', 'order:1', 'order:2', 'order:3', 'order:4', 'town:TW1',\n", " 'town:TW2', 'town:TW5', 'town:any', 'upgrade:TE', 'upgrade:TP',\n", " 'vp'], dtype=object)" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "singlegame = gameevents[gameevents['game'] == 'NewbiesWelcome12']\n", "r6 = singlegame[singlegame['round'] == 6]\n", "uniqueevents = np.sort(pd.unique(r6['event']))\n", "uniqueevents" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "There's no pass logged in round 6?! What?! What about round 5?" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['action:ACT1', 'action:ACT3', 'action:ACT6', 'action:ACTS',\n", " 'action:BON1', 'action:FAV6', 'bridge', 'build:D', 'burn', 'dig',\n", " 'favor:FAV1', 'favor:FAV10', 'favor:FAV5', 'favor:FAV6',\n", " 'favor:any', 'leech:count', 'leech:pw', 'order:1', 'order:2',\n", " 'order:3', 'order:4', 'pass:BON2', 'pass:BON4', 'pass:BON7',\n", " 'town:TW3', 'town:any', 'upgrade:SA', 'upgrade:TE', 'upgrade:TP',\n", " 'vp'], dtype=object)" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "singlegame = gameevents[gameevents['game'] == 'NewbiesWelcome12']\n", "r5 = singlegame[singlegame['round'] == 5]\n", "uniqueevents = np.sort(pd.unique(r5['event']))\n", "uniqueevents" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4388095 False\n", "4388096 False\n", "4388107 False\n", "4388109 False\n", "4388119 False\n", " ... \n", "4388605 False\n", "4388607 False\n", "4388612 False\n", "4388618 False\n", "4388619 False\n", "Name: event, Length: 92, dtype: bool" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "r6['event'].str.startswith('pass')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Okay. So there is in round 5 but not in round 6. Not a fool-proof way of checking no-one has dropped out but you'd hope if they made it to round 6, they wouldn't drop out then. Very annoying, we'll just base it off round 5 passes." ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "faction: swarmlings ended their turn?: True\n", "faction: halflings ended their turn?: True\n", "faction: engineers ended their turn?: False\n", "faction: witches ended their turn?: True\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gamenodropsstartplayersplayersdropped
0NewbiesWelcome12False41
\n", "
" ], "text/plain": [ " game nodrops startplayers playersdropped\n", "0 NewbiesWelcome12 False 4 1" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# sense-check - every player passed final round except engineers in this game\n", "singlegame = gameevents[gameevents['game'] == 'NewbiesWelcome12']\n", "\n", "extendedfactions = validfactions + ['dragonlords', 'riverwalkers', 'yetis', 'icemaidens', 'shapeshifters', 'acolytes']\n", "\n", "def check_game_ended(singlegame, verbose=False):\n", " r5 = singlegame[singlegame['round'] == 5]\n", " rawfactions = pd.unique(singlegame['faction'])\n", " verifiedfactions = [rawfaction for rawfaction in rawfactions if rawfaction in extendedfactions]\n", " allbool = []\n", "\n", " for faction in verifiedfactions:\n", " factionbool = len(r5[(r5['faction'] == faction) & (r5['event'].str.startswith('pass'))]) == 1\n", " allbool.append(factionbool)\n", " \n", " if verbose:\n", " print(f'faction: {faction} ended their turn?: {factionbool}')\n", " \n", " isgood = all(allbool)\n", " startplayers = len(verifiedfactions)\n", " boolsum = sum(allbool)\n", " playersdropped = startplayers - boolsum\n", " \n", " return isgood, startplayers, playersdropped\n", "\n", "isgood, startplayers, playersdropped = check_game_ended(singlegame, verbose=True)\n", "newdf = pd.DataFrame([['NewbiesWelcome12', isgood, startplayers, playersdropped]], columns=['game', 'nodrops', 'startplayers', 'playersdropped'])\n", "newdf" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We'll also need to check there even *is* a round 5 (or a round 6). If there isn't... clearly the game hasn't been finished. We might as well check nothing odd is going on and there isn't a round 7/8/9 either." ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Progressed to 10000th game\n", "Progressed to 20000th game\n", "Progressed to 30000th game\n", "Progressed to 40000th game\n", "Progressed to 50000th game\n", "Progressed to 60000th game\n", "Progressed to 70000th game\n", "Progressed to 80000th game\n", "Progressed to 90000th game\n", "Progressed to 100000th game\n", "Progressed to 110000th game\n", "no of unique games in table is: 115528\n" ] } ], "source": [ "gameevents = data['gameevents']\n", "gameslist = list(pd.unique(gameevents['game']))\n", "\n", "gamelengthlen = len(gameslist)\n", "gamesroundup = math.ceil(gamelengthlen / 100.0) * 100\n", "jj = 0\n", "playerdropdf = pd.DataFrame(columns=['game', 'nodrops', 'startplayers', 'playersdropped'])\n", "\n", "for ii in range(100, gamesroundup+1, 100): \n", " ii = min(ii, gamelengthlen) \n", " if (ii % 10000) == 0: # update every 10000 games\n", " print(f'Progressed to {ii}th game')\n", " \n", " next100games = gameslist[jj:ii]\n", " jj = ii # so that we don't get any repetitions at the very end, where our set will be smaller\n", " \n", " gameevents100 = gameevents[gameevents['game'].isin(next100games)]\n", "\n", " for game in next100games:\n", " singlegame = gameevents100[gameevents100['game'] == game]\n", " \n", " if not len(singlegame) == 0:\n", " isgood, startplayers, playersdropped = check_game_ended(singlegame)\n", " newdf = pd.DataFrame([[game, isgood, startplayers, playersdropped]], columns=['game', 'nodrops', 'startplayers', 'playersdropped'])\n", " playerdropdf = playerdropdf.append(newdf, ignore_index=True)\n", "\n", "print(f\"no of unique games in table is: {len(list(pd.unique(playerdropdf['game'])))}\")" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 100147\n", "1 11975\n", "2 2720\n", "3 530\n", "4 133\n", "5 23\n", "Name: playersdropped, dtype: int64" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "playerdropdf['playersdropped'].value_counts()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "playerdropdf['endplayers'] = playerdropdf['startplayers'] - playerdropdf['playersdropped']" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4 57599\n", "3 25708\n", "2 23374\n", "5 6057\n", "1 1921\n", "0 869\n", "Name: endplayers, dtype: int64" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "playerdropdf['endplayers'].value_counts()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "playerdropdf.to_csv('D://PycharmProjects/TerraBot/data/faction-picker-bot/unfinishedgames.csv')" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "# for coming back to edit\n", "playerdropdf = pd.read_csv('D://PycharmProjects/TerraBot/data/faction-picker-bot/unfinishedgames.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Remove all the games which end up with 0 or 1 player." ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0gamenodropsstartplayersplayersdroppedendplayers
000000000001True202
11050615True404
220512True404
33051501True505
44060303True404
.....................
115523115523YoureacrookcaptianhookFalse312
115524115524YourMomIsAChaosMagicianTrue303
115525115525ytuwertqwtrTrue202
115526115526yuertyqertTrue202
115527115527ZeicheMasZeuchnisTrue303
\n", "

115528 rows × 6 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 game nodrops startplayers \\\n", "0 0 0000000001 True 2 \n", "1 1 050615 True 4 \n", "2 2 0512 True 4 \n", "3 3 051501 True 5 \n", "4 4 060303 True 4 \n", "... ... ... ... ... \n", "115523 115523 Youreacrookcaptianhook False 3 \n", "115524 115524 YourMomIsAChaosMagician True 3 \n", "115525 115525 ytuwertqwtr True 2 \n", "115526 115526 yuertyqert True 2 \n", "115527 115527 ZeicheMasZeuchnis True 3 \n", "\n", " playersdropped endplayers \n", "0 0 2 \n", "1 0 4 \n", "2 0 4 \n", "3 0 5 \n", "4 0 4 \n", "... ... ... \n", "115523 1 2 \n", "115524 0 3 \n", "115525 0 2 \n", "115526 0 2 \n", "115527 0 3 \n", "\n", "[115528 rows x 6 columns]" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "playerdropdf" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "game events before: 73332500, game events after: 72827570, game events removed: 504930\n", "games before: 115529, games after: 112739, games removed: 2790\n", "gameST before: 693162, gameST after: 676434, games removed: 16728\n" ] } ], "source": [ "badgames = playerdropdf[playerdropdf['endplayers'].isin([0, 1])]\n", "data = filteringByBadgames(data, badgames)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2790\n" ] } ], "source": [ "print(len(playerdropdf[playerdropdf['endplayers'].isin([0, 1])]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Removing unwanted maps\n", "We want these maps: \n", "126fe960806d587c78546b30f1a90853b1ada468 - map1 \n", "95a66999127893f5925a5f591d54f8bcb9a670e6 - map2 \n", "be8f6ebf549404d015547152d5f2a1906ae8dd90 - map3" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "game events before: 72827570, game events after: 72067832, game events removed: 759738\n", "games before: 112739, games after: 111547, games removed: 1192\n", "gameST before: 676434, gameST after: 669282, games removed: 7152\n" ] } ], "source": [ "acceptablemaps = ['126fe960806d587c78546b30f1a90853b1ada468', \n", " '95a66999127893f5925a5f591d54f8bcb9a670e6', \n", " 'be8f6ebf549404d015547152d5f2a1906ae8dd90']\n", "\n", "badgames = games[~games[\"base_map\"].isin(acceptablemaps)]\n", "data = filteringByBadgames(data, badgames)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Creating the full dataset " ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Progressed to 10000th game\n", "Progressed to 20000th game\n", "Progressed to 30000th game\n", "Progressed to 40000th game\n", "Progressed to 50000th game\n", "Progressed to 60000th game\n", "Progressed to 70000th game\n", "Progressed to 80000th game\n", "Progressed to 90000th game\n", "Progressed to 100000th game\n", "Progressed to 110000th game\n", "no of unique games in table is: 111546\n" ] } ], "source": [ "vpdf, _, _ = makenewdf()\n", "featdf, _ = emptyfeaturesdf()\n", "\n", "gameevents = data['gameevents']\n", "games = data['games']\n", "gamescoringtiles = data['gamescoringtiles']\n", "\n", "gameslist = list(pd.unique(gameevents['game']))\n", "gamelengthlen = len(gameslist)\n", "gamesroundup = math.ceil(gamelengthlen / 100.0) * 100\n", "jj = 0\n", "\n", "for ii in range(100, gamesroundup+1, 100): \n", " ii = min(ii, gamelengthlen) \n", " if (ii % 10000) == 0: # update every 10000 games\n", " print(f'Progressed to {ii}th game')\n", " \n", " next100games = gameslist[jj:ii]\n", " jj = ii # so that we don't get any repetitions at the very end, where our set will be smaller\n", " \n", " gameevents100 = gameevents[gameevents['game'].isin(next100games)]\n", " gamemeta100 = games[games['game'].isin(next100games)]\n", " gameST100 = gamescoringtiles[gamescoringtiles['game'].isin(next100games)]\n", " endplayers100 = playerdropdf[playerdropdf['game'].isin(next100games)] # use this for player count\n", "\n", " for game in next100games:\n", " singlegame = gameevents100[gameevents100['game'] == game]\n", " singlegamemeta = gamemeta100[gamemeta100['game'] == game]\n", " singlegameST = gameST100[gameST100['game'] == game]\n", " singleendplayers = endplayers100[endplayers100['game'] == game]\n", " \n", " if not len(singlegame) == 0:\n", " vpforgame = get_vp_from_game(singlegame)\n", " featsforgame = get_features_from_game(singlegame, singlegamemeta, singlegameST, singleendplayers=singleendplayers)\n", " \n", " vpdf = vpdf.append(vpforgame, ignore_index=True)\n", " featdf = featdf.append(featsforgame, ignore_index=True)\n", "\n", "print(f\"no of unique games in table is: {len(list(pd.unique(vpdf['game'])))}\")\n" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gamewitchesaurenswarmlingsmermaidscultistshalflingsdwarvesengineerschaosmagiciansgiantsfakirsnomadsdarklingsalchemists
00000000001NaNNaNNaN118.0NaN62.0NaNNaNNaNNaNNaNNaNNaNNaN
1050615NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN122.0NaN
20512NaNNaNNaNNaNNaNNaNNaN124.0101.0NaNNaNNaNNaNNaN
3051501NaN84.0NaNNaNNaN77.0NaNNaN72.0NaNNaN54.078.0NaN
4060303NaNNaNNaN108.0NaNNaNNaN121.0NaNNaN78.0NaN128.0NaN
................................................
111541YoureacrookcaptianhookNaNNaN126.0NaNNaN105.0NaNNaNNaNNaNNaN37.0NaNNaN
111542YourMomIsAChaosMagician112.0NaN114.0NaNNaNNaNNaNNaNNaNNaNNaN120.0NaNNaN
111543ytuwertqwtrNaNNaN105.0NaNNaNNaN102.0NaNNaNNaNNaNNaNNaNNaN
111544yuertyqert128.0NaNNaNNaNNaNNaNNaNNaN141.0NaNNaNNaNNaNNaN
111545ZeicheMasZeuchnisNaNNaNNaNNaNNaNNaN132.0NaNNaNNaNNaN88.0120.0NaN
\n", "

111546 rows × 15 columns

\n", "
" ], "text/plain": [ " game witches auren swarmlings mermaids \\\n", "0 0000000001 NaN NaN NaN 118.0 \n", "1 050615 NaN NaN NaN NaN \n", "2 0512 NaN NaN NaN NaN \n", "3 051501 NaN 84.0 NaN NaN \n", "4 060303 NaN NaN NaN 108.0 \n", "... ... ... ... ... ... \n", "111541 Youreacrookcaptianhook NaN NaN 126.0 NaN \n", "111542 YourMomIsAChaosMagician 112.0 NaN 114.0 NaN \n", "111543 ytuwertqwtr NaN NaN 105.0 NaN \n", "111544 yuertyqert 128.0 NaN NaN NaN \n", "111545 ZeicheMasZeuchnis NaN NaN NaN NaN \n", "\n", " cultists halflings dwarves engineers chaosmagicians giants \\\n", "0 NaN 62.0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN NaN NaN \n", "2 NaN NaN NaN 124.0 101.0 NaN \n", "3 NaN 77.0 NaN NaN 72.0 NaN \n", "4 NaN NaN NaN 121.0 NaN NaN \n", "... ... ... ... ... ... ... \n", "111541 NaN 105.0 NaN NaN NaN NaN \n", "111542 NaN NaN NaN NaN NaN NaN \n", "111543 NaN NaN 102.0 NaN NaN NaN \n", "111544 NaN NaN NaN NaN 141.0 NaN \n", "111545 NaN NaN 132.0 NaN NaN NaN \n", "\n", " fakirs nomads darklings alchemists \n", "0 NaN NaN NaN NaN \n", "1 NaN NaN 122.0 NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN 54.0 78.0 NaN \n", "4 78.0 NaN 128.0 NaN \n", "... ... ... ... ... \n", "111541 NaN 37.0 NaN NaN \n", "111542 NaN 120.0 NaN NaN \n", "111543 NaN NaN NaN NaN \n", "111544 NaN NaN NaN NaN \n", "111545 NaN 88.0 120.0 NaN \n", "\n", "[111546 rows x 15 columns]" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vpdf" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gamer1_SCORE1r1_SCORE2r1_SCORE3r1_SCORE4r1_SCORE5r1_SCORE6r1_SCORE7r1_SCORE8r1_SCORE9...BON8BON9BON102players3players4players5playersmap1map2map3
00000000001000000010...0111000100
1050615000010000...1100010001
20512000010000...1100010100
3051501000000010...1000001100
4060303010000000...1110010010
..................................................................
111541Youreacrookcaptianhook000001000...0101000100
111542YourMomIsAChaosMagician001000000...1100100100
111543ytuwertqwtr100000000...1111000100
111544yuertyqert000100000...1011000100
111545ZeicheMasZeuchnis001000000...1100100100
\n", "

111546 rows × 72 columns

\n", "
" ], "text/plain": [ " game r1_SCORE1 r1_SCORE2 r1_SCORE3 r1_SCORE4 \\\n", "0 0000000001 0 0 0 0 \n", "1 050615 0 0 0 0 \n", "2 0512 0 0 0 0 \n", "3 051501 0 0 0 0 \n", "4 060303 0 1 0 0 \n", "... ... ... ... ... ... \n", "111541 Youreacrookcaptianhook 0 0 0 0 \n", "111542 YourMomIsAChaosMagician 0 0 1 0 \n", "111543 ytuwertqwtr 1 0 0 0 \n", "111544 yuertyqert 0 0 0 1 \n", "111545 ZeicheMasZeuchnis 0 0 1 0 \n", "\n", " r1_SCORE5 r1_SCORE6 r1_SCORE7 r1_SCORE8 r1_SCORE9 ... BON8 BON9 BON10 \\\n", "0 0 0 0 1 0 ... 0 1 1 \n", "1 1 0 0 0 0 ... 1 1 0 \n", "2 1 0 0 0 0 ... 1 1 0 \n", "3 0 0 0 1 0 ... 1 0 0 \n", "4 0 0 0 0 0 ... 1 1 1 \n", "... ... ... ... ... ... ... ... ... ... \n", "111541 0 1 0 0 0 ... 0 1 0 \n", "111542 0 0 0 0 0 ... 1 1 0 \n", "111543 0 0 0 0 0 ... 1 1 1 \n", "111544 0 0 0 0 0 ... 1 0 1 \n", "111545 0 0 0 0 0 ... 1 1 0 \n", "\n", " 2players 3players 4players 5players map1 map2 map3 \n", "0 1 0 0 0 1 0 0 \n", "1 0 0 1 0 0 0 1 \n", "2 0 0 1 0 1 0 0 \n", "3 0 0 0 1 1 0 0 \n", "4 0 0 1 0 0 1 0 \n", "... ... ... ... ... ... ... ... \n", "111541 1 0 0 0 1 0 0 \n", "111542 0 1 0 0 1 0 0 \n", "111543 1 0 0 0 1 0 0 \n", "111544 1 0 0 0 1 0 0 \n", "111545 0 1 0 0 1 0 0 \n", "\n", "[111546 rows x 72 columns]" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featdf" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "vpdf.to_csv('D://PycharmProjects/TerraBot/data/faction-picker-bot/vpdata.csv')\n", "featdf.to_csv('D://PycharmProjects/TerraBot/data/faction-picker-bot/featdata.csv')" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# check we still have the same games:\n", "gamesymmetricdif = set(vpdf['game']) ^ set(featdf['game'])\n", "len(gamesymmetricdif)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 4 }