{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "3fc7b6bb-9320-4dd4-82b8-90f5055c6ddc", "metadata": {}, "outputs": [], "source": [ "\n", "import pandas as pd\n", "import numpy as np " ] }, { "cell_type": "code", "execution_count": 116, "id": "0551b2bb-8a40-41c3-8290-7a04cf10f2f4", "metadata": {}, "outputs": [], "source": [ "import pickle" ] }, { "cell_type": "code", "execution_count": 117, "id": "0b9e8bfe-1fb6-4e46-a89c-35e9199a060b", "metadata": {}, "outputs": [], "source": [ "df=pickle.load(open('dataset_level2.pkl','rb'))" ] }, { "cell_type": "code", "execution_count": 118, "id": "912a730f-2918-4ee9-98e4-d281be103461", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcityvenue
02South AfricaEngland0.100DurbanKingsmead
12South AfricaEngland0.210DurbanKingsmead
22South AfricaEngland0.300DurbanKingsmead
32South AfricaEngland0.400DurbanKingsmead
42South AfricaEngland0.500DurbanKingsmead
...........................
5196411800AfghanistanBangladesh49.240Abu DhabiSheikh Zayed Stadium
5196421800AfghanistanBangladesh49.340Abu DhabiSheikh Zayed Stadium
5196431800AfghanistanBangladesh49.420Abu DhabiSheikh Zayed Stadium
5196441800AfghanistanBangladesh49.540Abu DhabiSheikh Zayed Stadium
5196451800AfghanistanBangladesh49.640Abu DhabiSheikh Zayed Stadium
\n", "

312984 rows × 8 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", "... ... ... ... ... ... ... \n", "519641 1800 Afghanistan Bangladesh 49.2 4 0 \n", "519642 1800 Afghanistan Bangladesh 49.3 4 0 \n", "519643 1800 Afghanistan Bangladesh 49.4 2 0 \n", "519644 1800 Afghanistan Bangladesh 49.5 4 0 \n", "519645 1800 Afghanistan Bangladesh 49.6 4 0 \n", "\n", " city venue \n", "0 Durban Kingsmead \n", "1 Durban Kingsmead \n", "2 Durban Kingsmead \n", "3 Durban Kingsmead \n", "4 Durban Kingsmead \n", "... ... ... \n", "519641 Abu Dhabi Sheikh Zayed Stadium \n", "519642 Abu Dhabi Sheikh Zayed Stadium \n", "519643 Abu Dhabi Sheikh Zayed Stadium \n", "519644 Abu Dhabi Sheikh Zayed Stadium \n", "519645 Abu Dhabi Sheikh Zayed Stadium \n", "\n", "[312984 rows x 8 columns]" ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 119, "id": "3b70cfe7-da9a-4a3c-8125-46d9cdfbda00", "metadata": {}, "outputs": [], "source": [ "#batting_team\n", "#bowling_team\n", "#city\n", "#current_score\n", "#balls left\n", "#wickets left\n", "#current rr\n", "#last five" ] }, { "cell_type": "code", "execution_count": 120, "id": "b4b99484-45f9-478f-bbdc-d347fae05fdc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "match_id 0\n", "batting_team 0\n", "bowling_team 0\n", "ball 0\n", "runs 0\n", "player_dismissed 0\n", "city 56082\n", "venue 0\n", "dtype: int64" ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 121, "id": "837239ee-1d89-41bb-886d-cc09391dc17a", "metadata": {}, "outputs": [], "source": [ "#56082 rows me values missing hain" ] }, { "cell_type": "code", "execution_count": 122, "id": "f531afaa-101b-4677-9981-e88c4a2a10a2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcityvenue
312413AustraliaSouth Africa0.100NaNMelbourne Cricket Ground
312513AustraliaSouth Africa0.200NaNMelbourne Cricket Ground
312613AustraliaSouth Africa0.310NaNMelbourne Cricket Ground
312713AustraliaSouth Africa0.410NaNMelbourne Cricket Ground
312813AustraliaSouth Africa0.510NaNMelbourne Cricket Ground
...........................
5172451792AustraliaSouth Africa49.210NaNHarare Sports Club
5172461792AustraliaSouth Africa49.320NaNHarare Sports Club
5172471792AustraliaSouth Africa49.40JP FaulknerNaNHarare Sports Club
5172481792AustraliaSouth Africa49.520NaNHarare Sports Club
5172491792AustraliaSouth Africa49.600NaNHarare Sports Club
\n", "

56082 rows × 8 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed city \\\n", "3124 13 Australia South Africa 0.1 0 0 NaN \n", "3125 13 Australia South Africa 0.2 0 0 NaN \n", "3126 13 Australia South Africa 0.3 1 0 NaN \n", "3127 13 Australia South Africa 0.4 1 0 NaN \n", "3128 13 Australia South Africa 0.5 1 0 NaN \n", "... ... ... ... ... ... ... ... \n", "517245 1792 Australia South Africa 49.2 1 0 NaN \n", "517246 1792 Australia South Africa 49.3 2 0 NaN \n", "517247 1792 Australia South Africa 49.4 0 JP Faulkner NaN \n", "517248 1792 Australia South Africa 49.5 2 0 NaN \n", "517249 1792 Australia South Africa 49.6 0 0 NaN \n", "\n", " venue \n", "3124 Melbourne Cricket Ground \n", "3125 Melbourne Cricket Ground \n", "3126 Melbourne Cricket Ground \n", "3127 Melbourne Cricket Ground \n", "3128 Melbourne Cricket Ground \n", "... ... \n", "517245 Harare Sports Club \n", "517246 Harare Sports Club \n", "517247 Harare Sports Club \n", "517248 Harare Sports Club \n", "517249 Harare Sports Club \n", "\n", "[56082 rows x 8 columns]" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['city'].isnull()]\n", "#vo rows jinme city ka value missing hai" ] }, { "cell_type": "code", "execution_count": 123, "id": "8a5fd13e-b679-472a-bdce-b6221f3757a6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "venue\n", "Sydney Cricket Ground 9595\n", "Rangiri Dambulla International Stadium 9450\n", "Melbourne Cricket Ground 7778\n", "Adelaide Oval 7166\n", "Dubai International Cricket Stadium 6314\n", "Pallekele International Cricket Stadium 5945\n", "Sharjah Cricket Stadium 4291\n", "Harare Sports Club 1538\n", "Chittagong Divisional Stadium 900\n", "Multan Cricket Stadium 883\n", "Rawalpindi Cricket Stadium 617\n", "Perth Stadium 526\n", "Dubai Sports City Cricket Stadium 518\n", "Sharjah Cricket Association Stadium 303\n", "Queenstown Events Centre 258\n", "Name: count, dtype: int64" ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['city'].isnull()]['venue'].value_counts()" ] }, { "cell_type": "code", "execution_count": 124, "id": "d5ef8ba3-5452-4b7a-ac7d-88405c15d1c5", "metadata": {}, "outputs": [], "source": [ "cities =np.where(df['city'].isnull(), df['venue'].str.split().apply(lambda x: x[0]), df['city'])\n", "#agar null value hai to df ke venue me ghuskr usme str.split ko call krke lambda apply..isse stadium se first word niklke aajayega\n", "# agar df['city'].isnull() ye true h to df['venue'].str.split().apply(lambda x: x[0]) ye kro else df['city'] ye fill krdo\n", "#agar city ka value missing to first word of stadium else vaise hi rehne do" ] }, { "cell_type": "code", "execution_count": 125, "id": "b1561b7e-e654-4846-83dc-e368259a2736", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 [Kingsmead]\n", "1 [Kingsmead]\n", "2 [Kingsmead]\n", "3 [Kingsmead]\n", "4 [Kingsmead]\n", " ... \n", "519641 [Sheikh, Zayed, Stadium]\n", "519642 [Sheikh, Zayed, Stadium]\n", "519643 [Sheikh, Zayed, Stadium]\n", "519644 [Sheikh, Zayed, Stadium]\n", "519645 [Sheikh, Zayed, Stadium]\n", "Name: venue, Length: 312984, dtype: object" ] }, "execution_count": 125, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['venue'].str.split()\n", "#split hogyi values" ] }, { "cell_type": "code", "execution_count": 126, "id": "5a3bdf6b-c9b9-4ba4-be0a-458d789e345e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 Kingsmead\n", "1 Kingsmead\n", "2 Kingsmead\n", "3 Kingsmead\n", "4 Kingsmead\n", " ... \n", "519641 Sheikh\n", "519642 Sheikh\n", "519643 Sheikh\n", "519644 Sheikh\n", "519645 Sheikh\n", "Name: venue, Length: 312984, dtype: object" ] }, "execution_count": 126, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['venue'].str.split().apply(lambda x: x[0])\n", "#sbka first word nikaal lia" ] }, { "cell_type": "code", "execution_count": 127, "id": "b071753f-b471-447d-9fb1-79585df594e6", "metadata": {}, "outputs": [], "source": [ "df['city']=cities" ] }, { "cell_type": "code", "execution_count": 128, "id": "aabf34fc-b12f-4972-ad21-b9224c33fc6e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "match_id 0\n", "batting_team 0\n", "bowling_team 0\n", "ball 0\n", "runs 0\n", "player_dismissed 0\n", "city 0\n", "venue 0\n", "dtype: int64" ] }, "execution_count": 128, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 129, "id": "9e61d71d-9bfc-4b1c-a837-8da2cf76a9f3", "metadata": {}, "outputs": [], "source": [ "df.drop(columns=['venue'],inplace=True)" ] }, { "cell_type": "code", "execution_count": 130, "id": "d2fdd769-7140-4940-a7be-694eb0853b88", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "city\n", "Mirpur 18663\n", "London 16581\n", "Colombo 15967\n", "Sydney 10205\n", "Abu Dhabi 10003\n", " ... \n", "Peshawar 307\n", "Faridabad 305\n", "Bulawayo 302\n", "Jamshedpur 300\n", "Dharmasala 271\n", "Name: count, Length: 97, dtype: int64" ] }, "execution_count": 130, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['city'].value_counts()" ] }, { "cell_type": "code", "execution_count": 131, "id": "6630034d-a13f-4df3-855b-830829a1280d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "city\n", "Mirpur 18663\n", "London 16581\n", "Colombo 15967\n", "Sydney 10205\n", "Abu Dhabi 10003\n", "Rangiri 9450\n", "Melbourne 8696\n", "Centurion 8091\n", "Adelaide 7472\n", "Dubai 7447\n", "Birmingham 6976\n", "Perth 6906\n", "Auckland 6204\n", "Cardiff 5956\n", "Pallekele 5945\n", "Brisbane 5904\n", "Wellington 5801\n", "Johannesburg 5578\n", "Hamilton 5295\n", "Sharjah 5202\n", "Chandigarh 5172\n", "Cape Town 5169\n", "Durban 5034\n", "Southampton 5008\n", "Nottingham 4996\n", "Manchester 4966\n", "Port Elizabeth 4948\n", "Leeds 4616\n", "Karachi 4514\n", "Napier 4107\n", "Christchurch 4060\n", "Hambantota 3701\n", "Hobart 3653\n", "Chester-le-Street 3604\n", "Nagpur 3023\n", "Mumbai 2995\n", "Lahore 2785\n", "Mount Maunganui 2721\n", "Chittagong 2623\n", "Delhi 2507\n", "Fatullah 2414\n", "Dunedin 2147\n", "Rajkot 2113\n", "Jaipur 2048\n", "Kolkata 1949\n", "Nelson 1904\n", "Ahmedabad 1846\n", "Hyderabad 1842\n", "Bristol 1789\n", "Canberra 1541\n", "Harare 1538\n", "Kanpur 1536\n", "Bloemfontein 1533\n", "Name: count, dtype: int64" ] }, "execution_count": 131, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['city'].value_counts()[df['city'].value_counts() > 1500]" ] }, { "cell_type": "code", "execution_count": 132, "id": "49365fd5-f8ec-47e5-98c1-8d3623f801dc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(53,)" ] }, "execution_count": 132, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['city'].value_counts()[df['city'].value_counts() > 1500].shape\n", "#53 cities" ] }, { "cell_type": "code", "execution_count": 133, "id": "6fd30596-d121-4fd9-9439-ba580d95e35b", "metadata": {}, "outputs": [], "source": [ "eligible_cities=df['city'].value_counts()[df['city'].value_counts() > 1500].index.tolist()\n", "#cities ka naam extract krke list me daal rhe" ] }, { "cell_type": "code", "execution_count": 134, "id": "b1861101-999f-4591-91b2-483a30ec0fad", "metadata": {}, "outputs": [], "source": [ "df=df[df['city'].isin(eligible_cities)]\n", "#vo sare matches hat jayenge jo eligible cities me nhi hue h" ] }, { "cell_type": "code", "execution_count": 135, "id": "5e4d1418-c184-48f1-8657-a693f0103920", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcity
02South AfricaEngland0.100Durban
12South AfricaEngland0.210Durban
22South AfricaEngland0.300Durban
32South AfricaEngland0.400Durban
42South AfricaEngland0.500Durban
........................
5196411800AfghanistanBangladesh49.240Abu Dhabi
5196421800AfghanistanBangladesh49.340Abu Dhabi
5196431800AfghanistanBangladesh49.420Abu Dhabi
5196441800AfghanistanBangladesh49.540Abu Dhabi
5196451800AfghanistanBangladesh49.640Abu Dhabi
\n", "

276744 rows × 7 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", "... ... ... ... ... ... ... \n", "519641 1800 Afghanistan Bangladesh 49.2 4 0 \n", "519642 1800 Afghanistan Bangladesh 49.3 4 0 \n", "519643 1800 Afghanistan Bangladesh 49.4 2 0 \n", "519644 1800 Afghanistan Bangladesh 49.5 4 0 \n", "519645 1800 Afghanistan Bangladesh 49.6 4 0 \n", "\n", " city \n", "0 Durban \n", "1 Durban \n", "2 Durban \n", "3 Durban \n", "4 Durban \n", "... ... \n", "519641 Abu Dhabi \n", "519642 Abu Dhabi \n", "519643 Abu Dhabi \n", "519644 Abu Dhabi \n", "519645 Abu Dhabi \n", "\n", "[276744 rows x 7 columns]" ] }, "execution_count": 135, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 136, "id": "2a726513-3b82-4efd-b48d-212a72f2b70e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 0\n", "1 1\n", "2 1\n", "3 1\n", "4 1\n", " ... \n", "519641 241\n", "519642 245\n", "519643 247\n", "519644 251\n", "519645 255\n", "Name: runs, Length: 276744, dtype: int64" ] }, "execution_count": 136, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#to find cumulative score\n", "df.groupby('match_id')['runs'].cumsum()\n" ] }, { "cell_type": "code", "execution_count": 137, "id": "79567475-df4d-40df-bb69-ed2855b68719", "metadata": {}, "outputs": [], "source": [ "df['current_score']=df.groupby('match_id')['runs'].cumsum()" ] }, { "cell_type": "code", "execution_count": 138, "id": "6c1332bc-c375-4f34-89fe-6c63b6be089f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcitycurrent_score
02South AfricaEngland0.100Durban0
12South AfricaEngland0.210Durban1
22South AfricaEngland0.300Durban1
32South AfricaEngland0.400Durban1
42South AfricaEngland0.500Durban1
...........................
5196411800AfghanistanBangladesh49.240Abu Dhabi241
5196421800AfghanistanBangladesh49.340Abu Dhabi245
5196431800AfghanistanBangladesh49.420Abu Dhabi247
5196441800AfghanistanBangladesh49.540Abu Dhabi251
5196451800AfghanistanBangladesh49.640Abu Dhabi255
\n", "

276744 rows × 8 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", "... ... ... ... ... ... ... \n", "519641 1800 Afghanistan Bangladesh 49.2 4 0 \n", "519642 1800 Afghanistan Bangladesh 49.3 4 0 \n", "519643 1800 Afghanistan Bangladesh 49.4 2 0 \n", "519644 1800 Afghanistan Bangladesh 49.5 4 0 \n", "519645 1800 Afghanistan Bangladesh 49.6 4 0 \n", "\n", " city current_score \n", "0 Durban 0 \n", "1 Durban 1 \n", "2 Durban 1 \n", "3 Durban 1 \n", "4 Durban 1 \n", "... ... ... \n", "519641 Abu Dhabi 241 \n", "519642 Abu Dhabi 245 \n", "519643 Abu Dhabi 247 \n", "519644 Abu Dhabi 251 \n", "519645 Abu Dhabi 255 \n", "\n", "[276744 rows x 8 columns]" ] }, "execution_count": 138, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 139, "id": "e45533b7-edb6-4ee9-b7f2-5bfe87e61d3e", "metadata": {}, "outputs": [], "source": [ "#decimal ke left me over aur right me ball ka number h unhe split krliya\n", "df['over']=df['ball'].apply(lambda x:str(x).split(\".\")[0])\n", "df['ball_no']=df['ball'].apply(lambda x:str(x).split(\".\")[1])" ] }, { "cell_type": "code", "execution_count": 140, "id": "4158bc9b-059d-4d61-85d8-db6d89cb8ee6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcitycurrent_scoreoverball_no
02South AfricaEngland0.100Durban001
12South AfricaEngland0.210Durban102
22South AfricaEngland0.300Durban103
32South AfricaEngland0.400Durban104
42South AfricaEngland0.500Durban105
.................................
5196411800AfghanistanBangladesh49.240Abu Dhabi241492
5196421800AfghanistanBangladesh49.340Abu Dhabi245493
5196431800AfghanistanBangladesh49.420Abu Dhabi247494
5196441800AfghanistanBangladesh49.540Abu Dhabi251495
5196451800AfghanistanBangladesh49.640Abu Dhabi255496
\n", "

276744 rows × 10 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", "... ... ... ... ... ... ... \n", "519641 1800 Afghanistan Bangladesh 49.2 4 0 \n", "519642 1800 Afghanistan Bangladesh 49.3 4 0 \n", "519643 1800 Afghanistan Bangladesh 49.4 2 0 \n", "519644 1800 Afghanistan Bangladesh 49.5 4 0 \n", "519645 1800 Afghanistan Bangladesh 49.6 4 0 \n", "\n", " city current_score over ball_no \n", "0 Durban 0 0 1 \n", "1 Durban 1 0 2 \n", "2 Durban 1 0 3 \n", "3 Durban 1 0 4 \n", "4 Durban 1 0 5 \n", "... ... ... ... ... \n", "519641 Abu Dhabi 241 49 2 \n", "519642 Abu Dhabi 245 49 3 \n", "519643 Abu Dhabi 247 49 4 \n", "519644 Abu Dhabi 251 49 5 \n", "519645 Abu Dhabi 255 49 6 \n", "\n", "[276744 rows x 10 columns]" ] }, "execution_count": 140, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 141, "id": "e64619c2-fde6-4522-b646-214c59f0c16b", "metadata": {}, "outputs": [], "source": [ "#balls kitni feki gyi\n", "df['balls_bowled']=(df['over'].astype('int')*6)+df['ball_no'].astype('int')" ] }, { "cell_type": "code", "execution_count": 142, "id": "343a4f6f-36d6-4832-891b-0ba248414899", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcitycurrent_scoreoverball_noballs_bowled
02South AfricaEngland0.100Durban0011
12South AfricaEngland0.210Durban1022
22South AfricaEngland0.300Durban1033
32South AfricaEngland0.400Durban1044
42South AfricaEngland0.500Durban1055
....................................
5196411800AfghanistanBangladesh49.240Abu Dhabi241492296
5196421800AfghanistanBangladesh49.340Abu Dhabi245493297
5196431800AfghanistanBangladesh49.420Abu Dhabi247494298
5196441800AfghanistanBangladesh49.540Abu Dhabi251495299
5196451800AfghanistanBangladesh49.640Abu Dhabi255496300
\n", "

276744 rows × 11 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", "... ... ... ... ... ... ... \n", "519641 1800 Afghanistan Bangladesh 49.2 4 0 \n", "519642 1800 Afghanistan Bangladesh 49.3 4 0 \n", "519643 1800 Afghanistan Bangladesh 49.4 2 0 \n", "519644 1800 Afghanistan Bangladesh 49.5 4 0 \n", "519645 1800 Afghanistan Bangladesh 49.6 4 0 \n", "\n", " city current_score over ball_no balls_bowled \n", "0 Durban 0 0 1 1 \n", "1 Durban 1 0 2 2 \n", "2 Durban 1 0 3 3 \n", "3 Durban 1 0 4 4 \n", "4 Durban 1 0 5 5 \n", "... ... ... ... ... ... \n", "519641 Abu Dhabi 241 49 2 296 \n", "519642 Abu Dhabi 245 49 3 297 \n", "519643 Abu Dhabi 247 49 4 298 \n", "519644 Abu Dhabi 251 49 5 299 \n", "519645 Abu Dhabi 255 49 6 300 \n", "\n", "[276744 rows x 11 columns]" ] }, "execution_count": 142, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 143, "id": "def21257-1472-466f-b69b-eace5b075a09", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcitycurrent_scoreoverball_noballs_bowledballs_left
02South AfricaEngland0.100Durban0011299
12South AfricaEngland0.210Durban1022298
22South AfricaEngland0.300Durban1033297
32South AfricaEngland0.400Durban1044296
42South AfricaEngland0.500Durban1055295
.......................................
5196411800AfghanistanBangladesh49.240Abu Dhabi2414922964
5196421800AfghanistanBangladesh49.340Abu Dhabi2454932973
5196431800AfghanistanBangladesh49.420Abu Dhabi2474942982
5196441800AfghanistanBangladesh49.540Abu Dhabi2514952991
5196451800AfghanistanBangladesh49.640Abu Dhabi2554963000
\n", "

276744 rows × 12 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", "... ... ... ... ... ... ... \n", "519641 1800 Afghanistan Bangladesh 49.2 4 0 \n", "519642 1800 Afghanistan Bangladesh 49.3 4 0 \n", "519643 1800 Afghanistan Bangladesh 49.4 2 0 \n", "519644 1800 Afghanistan Bangladesh 49.5 4 0 \n", "519645 1800 Afghanistan Bangladesh 49.6 4 0 \n", "\n", " city current_score over ball_no balls_bowled balls_left \n", "0 Durban 0 0 1 1 299 \n", "1 Durban 1 0 2 2 298 \n", "2 Durban 1 0 3 3 297 \n", "3 Durban 1 0 4 4 296 \n", "4 Durban 1 0 5 5 295 \n", "... ... ... ... ... ... ... \n", "519641 Abu Dhabi 241 49 2 296 4 \n", "519642 Abu Dhabi 245 49 3 297 3 \n", "519643 Abu Dhabi 247 49 4 298 2 \n", "519644 Abu Dhabi 251 49 5 299 1 \n", "519645 Abu Dhabi 255 49 6 300 0 \n", "\n", "[276744 rows x 12 columns]" ] }, "execution_count": 143, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['balls_left'] = 300 - df['balls_bowled']\n", "df['balls_left'] = df['balls_left'].apply(lambda x:0 if x<0 else x) #jahan negative tha use zero krdia\n", "df" ] }, { "cell_type": "code", "execution_count": 144, "id": "829893c2-629f-41ec-9243-f2075757ec19", "metadata": {}, "outputs": [], "source": [ "#player_dismissed column me if player dismiss nhi hua h to 0 h else uska naam h\n", "#wicket left nikalne ke liye humne player ke name ko 1 se replace krdia then groupby krke cumulative sum nikal dia" ] }, { "cell_type": "code", "execution_count": 145, "id": "67053ddc-bfa6-46bb-990f-ed9bd65d495c", "metadata": {}, "outputs": [], "source": [ "df['player_dismissed'] = df['player_dismissed'].apply(lambda x:0 if x=='0' else 1)\n", "df['player_dismissed'] = df['player_dismissed'].astype('int')\n", "df['player_dismissed'] = df.groupby('match_id')['player_dismissed'].cumsum()\n", "df['wickets'] = df['player_dismissed']\n" ] }, { "cell_type": "code", "execution_count": 146, "id": "01297336-7805-4772-b8d1-c0528d2b3d69", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcitycurrent_scoreoverball_noballs_bowledballs_leftwickets
02South AfricaEngland0.100Durban00112990
12South AfricaEngland0.210Durban10222980
22South AfricaEngland0.300Durban10332970
32South AfricaEngland0.400Durban10442960
42South AfricaEngland0.500Durban10552950
..........................................
5196411800AfghanistanBangladesh49.247Abu Dhabi24149229647
5196421800AfghanistanBangladesh49.347Abu Dhabi24549329737
5196431800AfghanistanBangladesh49.427Abu Dhabi24749429827
5196441800AfghanistanBangladesh49.547Abu Dhabi25149529917
5196451800AfghanistanBangladesh49.647Abu Dhabi25549630007
\n", "

276744 rows × 13 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", "... ... ... ... ... ... ... \n", "519641 1800 Afghanistan Bangladesh 49.2 4 7 \n", "519642 1800 Afghanistan Bangladesh 49.3 4 7 \n", "519643 1800 Afghanistan Bangladesh 49.4 2 7 \n", "519644 1800 Afghanistan Bangladesh 49.5 4 7 \n", "519645 1800 Afghanistan Bangladesh 49.6 4 7 \n", "\n", " city current_score over ball_no balls_bowled balls_left \\\n", "0 Durban 0 0 1 1 299 \n", "1 Durban 1 0 2 2 298 \n", "2 Durban 1 0 3 3 297 \n", "3 Durban 1 0 4 4 296 \n", "4 Durban 1 0 5 5 295 \n", "... ... ... ... ... ... ... \n", "519641 Abu Dhabi 241 49 2 296 4 \n", "519642 Abu Dhabi 245 49 3 297 3 \n", "519643 Abu Dhabi 247 49 4 298 2 \n", "519644 Abu Dhabi 251 49 5 299 1 \n", "519645 Abu Dhabi 255 49 6 300 0 \n", "\n", " wickets \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 0 \n", "4 0 \n", "... ... \n", "519641 7 \n", "519642 7 \n", "519643 7 \n", "519644 7 \n", "519645 7 \n", "\n", "[276744 rows x 13 columns]" ] }, "execution_count": 146, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 147, "id": "dc770c76-984b-4333-b8f0-515c28be6af1", "metadata": {}, "outputs": [], "source": [ "#to find current run rate\n", "df['crr'] = (df['current_score']*6)/df['balls_bowled']" ] }, { "cell_type": "code", "execution_count": 148, "id": "69250573-f2b2-4fec-99ea-b46fa114ac29", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcitycurrent_scoreoverball_noballs_bowledballs_leftwicketscrr
02South AfricaEngland0.100Durban001129900.000000
12South AfricaEngland0.210Durban102229803.000000
22South AfricaEngland0.300Durban103329702.000000
32South AfricaEngland0.400Durban104429601.500000
42South AfricaEngland0.500Durban105529501.200000
.............................................
5196411800AfghanistanBangladesh49.247Abu Dhabi241492296474.885135
5196421800AfghanistanBangladesh49.347Abu Dhabi245493297374.949495
5196431800AfghanistanBangladesh49.427Abu Dhabi247494298274.973154
5196441800AfghanistanBangladesh49.547Abu Dhabi251495299175.036789
5196451800AfghanistanBangladesh49.647Abu Dhabi255496300075.100000
\n", "

276744 rows × 14 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", "... ... ... ... ... ... ... \n", "519641 1800 Afghanistan Bangladesh 49.2 4 7 \n", "519642 1800 Afghanistan Bangladesh 49.3 4 7 \n", "519643 1800 Afghanistan Bangladesh 49.4 2 7 \n", "519644 1800 Afghanistan Bangladesh 49.5 4 7 \n", "519645 1800 Afghanistan Bangladesh 49.6 4 7 \n", "\n", " city current_score over ball_no balls_bowled balls_left \\\n", "0 Durban 0 0 1 1 299 \n", "1 Durban 1 0 2 2 298 \n", "2 Durban 1 0 3 3 297 \n", "3 Durban 1 0 4 4 296 \n", "4 Durban 1 0 5 5 295 \n", "... ... ... ... ... ... ... \n", "519641 Abu Dhabi 241 49 2 296 4 \n", "519642 Abu Dhabi 245 49 3 297 3 \n", "519643 Abu Dhabi 247 49 4 298 2 \n", "519644 Abu Dhabi 251 49 5 299 1 \n", "519645 Abu Dhabi 255 49 6 300 0 \n", "\n", " wickets crr \n", "0 0 0.000000 \n", "1 0 3.000000 \n", "2 0 2.000000 \n", "3 0 1.500000 \n", "4 0 1.200000 \n", "... ... ... \n", "519641 7 4.885135 \n", "519642 7 4.949495 \n", "519643 7 4.973154 \n", "519644 7 5.036789 \n", "519645 7 5.100000 \n", "\n", "[276744 rows x 14 columns]" ] }, "execution_count": 148, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 149, "id": "82e14e6f-9531-4e15-93f0-42c37e8068d4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " match_id last_five_runs\n", "0 2 NaN\n", "1 2 NaN\n", "2 2 NaN\n", "3 2 NaN\n", "4 2 NaN\n", "... ... ...\n", "276739 1800 45.0\n", "276740 1800 48.0\n", "276741 1800 49.0\n", "276742 1800 53.0\n", "276743 1800 57.0\n", "\n", "[276744 rows x 2 columns]\n" ] } ], "source": [ "import pandas as pd\n", "\n", "# Assuming you have a DataFrame 'df' with columns 'match_id' and 'runs' containing ball-by-ball data\n", "\n", "# Define a rolling window of 30 balls (5 overs * 6 balls per over)\n", "rolling_window = 30\n", "\n", "# Create a new DataFrame to store the results\n", "last_five_df = pd.DataFrame(columns=['match_id', 'last_five_runs'])\n", "\n", "# Iterate through unique match IDs\n", "match_ids = df['match_id'].unique()\n", "for match_id in match_ids:\n", " match_data = df[df['match_id'] == match_id].copy() # Create a copy of the slice\n", " match_data.loc[:, 'last_five_runs'] = match_data['runs'].rolling(rolling_window).sum()\n", " #match_data.dropna(inplace=True) # Remove rows with NaN values due to rolling\n", " last_five_df = pd.concat([last_five_df, match_data[['match_id', 'last_five_runs']]], ignore_index=True)\n", "\n", "# Print or use the 'last_five_df' DataFrame containing match_id and last_five_runs\n", "print(last_five_df)\n" ] }, { "cell_type": "code", "execution_count": 150, "id": "3165963a-2f87-48ce-8934-d74ac9473280", "metadata": {}, "outputs": [], "source": [ "df['last_five_runs']= last_five_df['last_five_runs']" ] }, { "cell_type": "code", "execution_count": 151, "id": "d33b699f-b877-485d-8e2e-9d4a765b09fa", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcitycurrent_scoreoverball_noballs_bowledballs_leftwicketscrrlast_five_runs
02South AfricaEngland0.100Durban001129900.000000NaN
12South AfricaEngland0.210Durban102229803.000000NaN
22South AfricaEngland0.300Durban103329702.000000NaN
32South AfricaEngland0.400Durban104429601.500000NaN
42South AfricaEngland0.500Durban105529501.200000NaN
................................................
5954New ZealandEngland18.424Christchurch6518411218843.48214321.0
5964New ZealandEngland18.514Christchurch6618511318743.50442521.0
5974New ZealandEngland18.614Christchurch6718611418643.52631622.0
5984New ZealandEngland19.104Christchurch6719111518543.49565222.0
5994New ZealandEngland19.214Christchurch6819211618443.51724122.0
\n", "

600 rows × 15 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", ".. ... ... ... ... ... ... \n", "595 4 New Zealand England 18.4 2 4 \n", "596 4 New Zealand England 18.5 1 4 \n", "597 4 New Zealand England 18.6 1 4 \n", "598 4 New Zealand England 19.1 0 4 \n", "599 4 New Zealand England 19.2 1 4 \n", "\n", " city current_score over ball_no balls_bowled balls_left \\\n", "0 Durban 0 0 1 1 299 \n", "1 Durban 1 0 2 2 298 \n", "2 Durban 1 0 3 3 297 \n", "3 Durban 1 0 4 4 296 \n", "4 Durban 1 0 5 5 295 \n", ".. ... ... ... ... ... ... \n", "595 Christchurch 65 18 4 112 188 \n", "596 Christchurch 66 18 5 113 187 \n", "597 Christchurch 67 18 6 114 186 \n", "598 Christchurch 67 19 1 115 185 \n", "599 Christchurch 68 19 2 116 184 \n", "\n", " wickets crr last_five_runs \n", "0 0 0.000000 NaN \n", "1 0 3.000000 NaN \n", "2 0 2.000000 NaN \n", "3 0 1.500000 NaN \n", "4 0 1.200000 NaN \n", ".. ... ... ... \n", "595 4 3.482143 21.0 \n", "596 4 3.504425 21.0 \n", "597 4 3.526316 22.0 \n", "598 4 3.495652 22.0 \n", "599 4 3.517241 22.0 \n", "\n", "[600 rows x 15 columns]" ] }, "execution_count": 151, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(600)" ] }, { "cell_type": "code", "execution_count": 152, "id": "37e3a02b-442a-49a5-8789-85abc7aa1b36", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(276744, 15)" ] }, "execution_count": 152, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 153, "id": "af8f4a0e-6b57-437b-8afb-9936aae863d0", "metadata": {}, "outputs": [], "source": [ "# import pandas as pd\n", "\n", "# # Assuming you have a DataFrame 'df' with columns 'match_id' and 'wickets' containing ball-by-ball data\n", "\n", "# # Define a rolling window of 30 balls (5 overs * 6 balls per over)\n", "# rolling_window = 30\n", "\n", "# # Create a new DataFrame to store the results\n", "# last_five_wickdf = pd.DataFrame(columns=['match_id', 'last_five_wickets'])\n", "\n", "# # Iterate through unique match IDs\n", "# match_ids = df['match_id'].unique()\n", "# for match_id in match_ids:\n", "# match_data = df[df['match_id'] == match_id].copy() # Create a copy of the slice\n", "# match_data['last_five_wickets'] = match_data['wickets'].rolling(rolling_window).max()\n", "# last_five_wickdf = pd.concat([last_five_wickdf, match_data[['match_id', 'last_five_wickets']]], ignore_index=True)\n", "\n", "# # Print or use the 'last_five_wickdf' DataFrame containing match_id and last_five_wickets\n", "# print(last_five_wickdf)" ] }, { "cell_type": "code", "execution_count": 154, "id": "492adf71-b528-4125-a8dd-5f30f6dd5c9c", "metadata": {}, "outputs": [], "source": [ "# import pandas as pd\n", "\n", "# # Create the source DataFrame\n", "# source_df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})\n", "\n", "# # Create the destination DataFrame\n", "# destination_df = pd.DataFrame({'X': [7, 8, 9]})\n", "\n", "# # Copy the 'A' column from source_df to destination_df using the copy method\n", "# destination_df['A'] = source_df['A'].copy()\n", "\n", "# # Now destination_df will have the 'A' column copied from source_df\n", "# print(destination_df)" ] }, { "cell_type": "code", "execution_count": 155, "id": "80a6c7cb-5d06-4192-ba46-099d31942052", "metadata": {}, "outputs": [], "source": [ "# df['last_five_wickets']=last_five_wickdf['last_five_wickets'].copy\n" ] }, { "cell_type": "code", "execution_count": 156, "id": "493bfbef-205f-4372-8246-a9a4b5210b10", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcitycurrent_scoreoverball_noballs_bowledballs_leftwicketscrrlast_five_runs
02South AfricaEngland0.100Durban001129900.000000NaN
12South AfricaEngland0.210Durban102229803.000000NaN
22South AfricaEngland0.300Durban103329702.000000NaN
32South AfricaEngland0.400Durban104429601.500000NaN
42South AfricaEngland0.500Durban105529501.200000NaN
................................................
5954New ZealandEngland18.424Christchurch6518411218843.48214321.0
5964New ZealandEngland18.514Christchurch6618511318743.50442521.0
5974New ZealandEngland18.614Christchurch6718611418643.52631622.0
5984New ZealandEngland19.104Christchurch6719111518543.49565222.0
5994New ZealandEngland19.214Christchurch6819211618443.51724122.0
\n", "

600 rows × 15 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", ".. ... ... ... ... ... ... \n", "595 4 New Zealand England 18.4 2 4 \n", "596 4 New Zealand England 18.5 1 4 \n", "597 4 New Zealand England 18.6 1 4 \n", "598 4 New Zealand England 19.1 0 4 \n", "599 4 New Zealand England 19.2 1 4 \n", "\n", " city current_score over ball_no balls_bowled balls_left \\\n", "0 Durban 0 0 1 1 299 \n", "1 Durban 1 0 2 2 298 \n", "2 Durban 1 0 3 3 297 \n", "3 Durban 1 0 4 4 296 \n", "4 Durban 1 0 5 5 295 \n", ".. ... ... ... ... ... ... \n", "595 Christchurch 65 18 4 112 188 \n", "596 Christchurch 66 18 5 113 187 \n", "597 Christchurch 67 18 6 114 186 \n", "598 Christchurch 67 19 1 115 185 \n", "599 Christchurch 68 19 2 116 184 \n", "\n", " wickets crr last_five_runs \n", "0 0 0.000000 NaN \n", "1 0 3.000000 NaN \n", "2 0 2.000000 NaN \n", "3 0 1.500000 NaN \n", "4 0 1.200000 NaN \n", ".. ... ... ... \n", "595 4 3.482143 21.0 \n", "596 4 3.504425 21.0 \n", "597 4 3.526316 22.0 \n", "598 4 3.495652 22.0 \n", "599 4 3.517241 22.0 \n", "\n", "[600 rows x 15 columns]" ] }, "execution_count": 156, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(600)" ] }, { "cell_type": "code", "execution_count": 157, "id": "d911e841-b6e4-4650-9c4f-7dd9d2a1032d", "metadata": {}, "outputs": [], "source": [ "#har match me starting 30 values missing cuzuske baad calculate hoga na last five ka column" ] }, { "cell_type": "code", "execution_count": 158, "id": "d8c47333-7d68-4dfb-8f18-c92437a26310", "metadata": {}, "outputs": [], "source": [ "#df = df.merge(last_five_df[['match_id', 'last_five_runs']], on='match_id', how ='inner')\n", "#The how='left' argument specifies that we want to keep all the rows from the left DataFrame (df) \n", "#and match the rows from the right DataFrame (last_five_df) based on the 'match_id' column. \n", "#This ensures that all rows in the original DataFrame are retained, and any matching 'last_five_runs' values are added accordingly." ] }, { "cell_type": "code", "execution_count": 159, "id": "98702d30-8efc-4c1d-b0fb-d0d4d7fbdcfb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcitycurrent_scoreoverball_noballs_bowledballs_leftwicketscrrlast_five_runs
02South AfricaEngland0.100Durban001129900.000000NaN
12South AfricaEngland0.210Durban102229803.000000NaN
22South AfricaEngland0.300Durban103329702.000000NaN
32South AfricaEngland0.400Durban104429601.500000NaN
42South AfricaEngland0.500Durban105529501.200000NaN
................................................
5196411800AfghanistanBangladesh49.247Abu Dhabi241492296474.885135NaN
5196421800AfghanistanBangladesh49.347Abu Dhabi245493297374.949495NaN
5196431800AfghanistanBangladesh49.427Abu Dhabi247494298274.973154NaN
5196441800AfghanistanBangladesh49.547Abu Dhabi251495299175.036789NaN
5196451800AfghanistanBangladesh49.647Abu Dhabi255496300075.100000NaN
\n", "

276744 rows × 15 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", "... ... ... ... ... ... ... \n", "519641 1800 Afghanistan Bangladesh 49.2 4 7 \n", "519642 1800 Afghanistan Bangladesh 49.3 4 7 \n", "519643 1800 Afghanistan Bangladesh 49.4 2 7 \n", "519644 1800 Afghanistan Bangladesh 49.5 4 7 \n", "519645 1800 Afghanistan Bangladesh 49.6 4 7 \n", "\n", " city current_score over ball_no balls_bowled balls_left \\\n", "0 Durban 0 0 1 1 299 \n", "1 Durban 1 0 2 2 298 \n", "2 Durban 1 0 3 3 297 \n", "3 Durban 1 0 4 4 296 \n", "4 Durban 1 0 5 5 295 \n", "... ... ... ... ... ... ... \n", "519641 Abu Dhabi 241 49 2 296 4 \n", "519642 Abu Dhabi 245 49 3 297 3 \n", "519643 Abu Dhabi 247 49 4 298 2 \n", "519644 Abu Dhabi 251 49 5 299 1 \n", "519645 Abu Dhabi 255 49 6 300 0 \n", "\n", " wickets crr last_five_runs \n", "0 0 0.000000 NaN \n", "1 0 3.000000 NaN \n", "2 0 2.000000 NaN \n", "3 0 1.500000 NaN \n", "4 0 1.200000 NaN \n", "... ... ... ... \n", "519641 7 4.885135 NaN \n", "519642 7 4.949495 NaN \n", "519643 7 4.973154 NaN \n", "519644 7 5.036789 NaN \n", "519645 7 5.100000 NaN \n", "\n", "[276744 rows x 15 columns]" ] }, "execution_count": 159, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 160, "id": "c280e39f-a3d3-431a-a15b-94d6dd93861c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(276744, 15)" ] }, "execution_count": 160, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 161, "id": "a75e7690-9111-4b8c-931a-a5bdad82ae66", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " match_id total_score\n", "0 2 211\n", "1 3 131\n", "2 4 223\n", "3 5 257\n", "4 6 229\n", ".. ... ...\n", "947 1791 408\n", "948 1792 217\n", "949 1793 221\n", "950 1794 301\n", "951 1800 255\n", "\n", "[952 rows x 2 columns]\n" ] } ], "source": [ "import pandas as pd\n", "\n", "# Assuming you have a DataFrame 'df' with columns 'match_id' and 'runs' representing runs scored per ball\n", "\n", "# Step 1: Convert the 'runs' column to numeric (if not already)\n", "df['runs'] = pd.to_numeric(df['runs'], errors='coerce')\n", "\n", "# Step 2: Calculate the total score for each match\n", "total_scores = df.groupby('match_id')['runs'].sum().reset_index()\n", "total_scores.rename(columns={'runs': 'total_score'}, inplace=True)\n", "\n", "# The 'total_score' column now contains the total score for each match ID\n", "print(total_scores)\n" ] }, { "cell_type": "code", "execution_count": 162, "id": "8e73df71-ad11-4a50-bcf8-00736b097997", "metadata": {}, "outputs": [], "source": [ "df = df.merge(total_scores[['match_id', 'total_score']], on='match_id', how ='inner')" ] }, { "cell_type": "code", "execution_count": 163, "id": "6d97bf54-e58c-4f8f-a479-d65ec2e6b01e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcitycurrent_scoreoverball_noballs_bowledballs_leftwicketscrrlast_five_runstotal_score
02South AfricaEngland0.100Durban001129900.000000NaN211
12South AfricaEngland0.210Durban102229803.000000NaN211
22South AfricaEngland0.300Durban103329702.000000NaN211
32South AfricaEngland0.400Durban104429601.500000NaN211
42South AfricaEngland0.500Durban105529501.200000NaN211
...................................................
5954New ZealandEngland18.424Christchurch6518411218843.48214321.0223
5964New ZealandEngland18.514Christchurch6618511318743.50442521.0223
5974New ZealandEngland18.614Christchurch6718611418643.52631622.0223
5984New ZealandEngland19.104Christchurch6719111518543.49565222.0223
5994New ZealandEngland19.214Christchurch6819211618443.51724122.0223
\n", "

600 rows × 16 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", ".. ... ... ... ... ... ... \n", "595 4 New Zealand England 18.4 2 4 \n", "596 4 New Zealand England 18.5 1 4 \n", "597 4 New Zealand England 18.6 1 4 \n", "598 4 New Zealand England 19.1 0 4 \n", "599 4 New Zealand England 19.2 1 4 \n", "\n", " city current_score over ball_no balls_bowled balls_left \\\n", "0 Durban 0 0 1 1 299 \n", "1 Durban 1 0 2 2 298 \n", "2 Durban 1 0 3 3 297 \n", "3 Durban 1 0 4 4 296 \n", "4 Durban 1 0 5 5 295 \n", ".. ... ... ... ... ... ... \n", "595 Christchurch 65 18 4 112 188 \n", "596 Christchurch 66 18 5 113 187 \n", "597 Christchurch 67 18 6 114 186 \n", "598 Christchurch 67 19 1 115 185 \n", "599 Christchurch 68 19 2 116 184 \n", "\n", " wickets crr last_five_runs total_score \n", "0 0 0.000000 NaN 211 \n", "1 0 3.000000 NaN 211 \n", "2 0 2.000000 NaN 211 \n", "3 0 1.500000 NaN 211 \n", "4 0 1.200000 NaN 211 \n", ".. ... ... ... ... \n", "595 4 3.482143 21.0 223 \n", "596 4 3.504425 21.0 223 \n", "597 4 3.526316 22.0 223 \n", "598 4 3.495652 22.0 223 \n", "599 4 3.517241 22.0 223 \n", "\n", "[600 rows x 16 columns]" ] }, "execution_count": 163, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(600)" ] }, { "cell_type": "code", "execution_count": 164, "id": "a3bbb0f6-b488-4645-b217-3ec10ecfbb54", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(276744, 16)" ] }, "execution_count": 164, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 165, "id": "a51b9cb8-82ec-44da-b457-3c0f82c5d9a7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcitycurrent_scoreoverball_noballs_bowledballs_leftwicketscrrlast_five_runstotal_score
02South AfricaEngland0.100Durban001129900.000000NaN211
12South AfricaEngland0.210Durban102229803.000000NaN211
22South AfricaEngland0.300Durban103329702.000000NaN211
32South AfricaEngland0.400Durban104429601.500000NaN211
42South AfricaEngland0.500Durban105529501.200000NaN211
...................................................
169511AustraliaEngland20.412Manchester10620412417625.12903217.0315
169611AustraliaEngland20.502Manchester10620512517525.08800017.0315
169711AustraliaEngland20.602Manchester10620612617425.04761918.0315
169811AustraliaEngland21.142Manchester11021112717325.19685014.0315
169911AustraliaEngland21.212Manchester11121212817225.20312514.0315
\n", "

1700 rows × 16 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", "... ... ... ... ... ... ... \n", "1695 11 Australia England 20.4 1 2 \n", "1696 11 Australia England 20.5 0 2 \n", "1697 11 Australia England 20.6 0 2 \n", "1698 11 Australia England 21.1 4 2 \n", "1699 11 Australia England 21.2 1 2 \n", "\n", " city current_score over ball_no balls_bowled balls_left \\\n", "0 Durban 0 0 1 1 299 \n", "1 Durban 1 0 2 2 298 \n", "2 Durban 1 0 3 3 297 \n", "3 Durban 1 0 4 4 296 \n", "4 Durban 1 0 5 5 295 \n", "... ... ... ... ... ... ... \n", "1695 Manchester 106 20 4 124 176 \n", "1696 Manchester 106 20 5 125 175 \n", "1697 Manchester 106 20 6 126 174 \n", "1698 Manchester 110 21 1 127 173 \n", "1699 Manchester 111 21 2 128 172 \n", "\n", " wickets crr last_five_runs total_score \n", "0 0 0.000000 NaN 211 \n", "1 0 3.000000 NaN 211 \n", "2 0 2.000000 NaN 211 \n", "3 0 1.500000 NaN 211 \n", "4 0 1.200000 NaN 211 \n", "... ... ... ... ... \n", "1695 2 5.129032 17.0 315 \n", "1696 2 5.088000 17.0 315 \n", "1697 2 5.047619 18.0 315 \n", "1698 2 5.196850 14.0 315 \n", "1699 2 5.203125 14.0 315 \n", "\n", "[1700 rows x 16 columns]" ] }, "execution_count": 165, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(1700)" ] }, { "cell_type": "code", "execution_count": 179, "id": "04e19c08-8710-4e4a-a633-3fb46608caa6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['match_id', 'batting_team', 'bowling_team', 'overs', 'current_score',\n", " 'balls_left', 'wickets', 'crr', 'last_five_runs', 'total_score'],\n", " dtype='object')" ] }, "execution_count": 179, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns\n" ] }, { "cell_type": "code", "execution_count": 166, "id": "b8a69dca-d4fe-48cb-bff5-02cd09163832", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", ".. ... ... ... ... ... ... \n", "280 2 South Africa England 45.5 0 9 \n", "281 2 South Africa England 45.6 1 9 \n", "282 2 South Africa England 46.1 0 9 \n", "283 2 South Africa England 46.2 1 9 \n", "284 2 South Africa England 46.3 0 10 \n", "\n", " city current_score over ball_no balls_bowled balls_left wickets \\\n", "0 Durban 0 0 1 1 299 0 \n", "1 Durban 1 0 2 2 298 0 \n", "2 Durban 1 0 3 3 297 0 \n", "3 Durban 1 0 4 4 296 0 \n", "4 Durban 1 0 5 5 295 0 \n", ".. ... ... ... ... ... ... ... \n", "280 Durban 209 45 5 275 25 9 \n", "281 Durban 210 45 6 276 24 9 \n", "282 Durban 210 46 1 277 23 9 \n", "283 Durban 211 46 2 278 22 9 \n", "284 Durban 211 46 3 279 21 10 \n", "\n", " crr last_five_runs total_score \n", "0 0.000000 NaN 211 \n", "1 3.000000 NaN 211 \n", "2 2.000000 NaN 211 \n", "3 1.500000 NaN 211 \n", "4 1.200000 NaN 211 \n", ".. ... ... ... \n", "280 4.560000 35.0 211 \n", "281 4.565217 32.0 211 \n", "282 4.548736 32.0 211 \n", "283 4.553957 32.0 211 \n", "284 4.537634 32.0 211 \n", "\n", "[285 rows x 16 columns]\n" ] } ], "source": [ "match_id_2_rows = df[df['match_id'] == 2]\n", "\n", "# Print all rows with match ID 2\n", "print(match_id_2_rows.head(600))" ] }, { "cell_type": "code", "execution_count": 167, "id": "de971955-142d-4fb4-ac34-afc696b3ab89", "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", "execution_count": 168, "id": "13ae4947-f4ea-4906-9b0f-d637e9f72e80", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcitycurrent_scoreoverball_noballs_bowledballs_leftwicketscrrlast_five_runstotal_score
02South AfricaEngland0.100Durban001129900.000000NaN211
12South AfricaEngland0.210Durban102229803.000000NaN211
22South AfricaEngland0.300Durban103329702.000000NaN211
32South AfricaEngland0.400Durban104429601.500000NaN211
42South AfricaEngland0.500Durban105529501.200000NaN211
...................................................
169511AustraliaEngland20.412Manchester10620412417625.12903217.0315
169611AustraliaEngland20.502Manchester10620512517525.08800017.0315
169711AustraliaEngland20.602Manchester10620612617425.04761918.0315
169811AustraliaEngland21.142Manchester11021112717325.19685014.0315
169911AustraliaEngland21.212Manchester11121212817225.20312514.0315
\n", "

1700 rows × 16 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", "... ... ... ... ... ... ... \n", "1695 11 Australia England 20.4 1 2 \n", "1696 11 Australia England 20.5 0 2 \n", "1697 11 Australia England 20.6 0 2 \n", "1698 11 Australia England 21.1 4 2 \n", "1699 11 Australia England 21.2 1 2 \n", "\n", " city current_score over ball_no balls_bowled balls_left \\\n", "0 Durban 0 0 1 1 299 \n", "1 Durban 1 0 2 2 298 \n", "2 Durban 1 0 3 3 297 \n", "3 Durban 1 0 4 4 296 \n", "4 Durban 1 0 5 5 295 \n", "... ... ... ... ... ... ... \n", "1695 Manchester 106 20 4 124 176 \n", "1696 Manchester 106 20 5 125 175 \n", "1697 Manchester 106 20 6 126 174 \n", "1698 Manchester 110 21 1 127 173 \n", "1699 Manchester 111 21 2 128 172 \n", "\n", " wickets crr last_five_runs total_score \n", "0 0 0.000000 NaN 211 \n", "1 0 3.000000 NaN 211 \n", "2 0 2.000000 NaN 211 \n", "3 0 1.500000 NaN 211 \n", "4 0 1.200000 NaN 211 \n", "... ... ... ... ... \n", "1695 2 5.129032 17.0 315 \n", "1696 2 5.088000 17.0 315 \n", "1697 2 5.047619 18.0 315 \n", "1698 2 5.196850 14.0 315 \n", "1699 2 5.203125 14.0 315 \n", "\n", "[1700 rows x 16 columns]" ] }, "execution_count": 168, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(1700)" ] }, { "cell_type": "code", "execution_count": 169, "id": "47a61d76-dc7c-4458-842e-5db79998eda0", "metadata": {}, "outputs": [], "source": [ "df['last_five_runs'].fillna(0, inplace=True)" ] }, { "cell_type": "code", "execution_count": 170, "id": "6f22b917-316c-4d0c-a5cf-362faa431a67", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamballrunsplayer_dismissedcitycurrent_scoreoverball_noballs_bowledballs_leftwicketscrrlast_five_runstotal_score
02South AfricaEngland0.100Durban001129900.0000000.0211
12South AfricaEngland0.210Durban102229803.0000000.0211
22South AfricaEngland0.300Durban103329702.0000000.0211
32South AfricaEngland0.400Durban104429601.5000000.0211
42South AfricaEngland0.500Durban105529501.2000000.0211
...................................................
1952South AfricaEngland32.214Durban12432219410643.83505220.0211
1962South AfricaEngland32.314Durban12532319510543.84615420.0211
1972South AfricaEngland32.414Durban12632419610443.85714320.0211
1982South AfricaEngland32.514Durban12732519710343.86802020.0211
1992South AfricaEngland32.644Durban13132619810243.96969724.0211
\n", "

200 rows × 16 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team ball runs player_dismissed \\\n", "0 2 South Africa England 0.1 0 0 \n", "1 2 South Africa England 0.2 1 0 \n", "2 2 South Africa England 0.3 0 0 \n", "3 2 South Africa England 0.4 0 0 \n", "4 2 South Africa England 0.5 0 0 \n", ".. ... ... ... ... ... ... \n", "195 2 South Africa England 32.2 1 4 \n", "196 2 South Africa England 32.3 1 4 \n", "197 2 South Africa England 32.4 1 4 \n", "198 2 South Africa England 32.5 1 4 \n", "199 2 South Africa England 32.6 4 4 \n", "\n", " city current_score over ball_no balls_bowled balls_left wickets \\\n", "0 Durban 0 0 1 1 299 0 \n", "1 Durban 1 0 2 2 298 0 \n", "2 Durban 1 0 3 3 297 0 \n", "3 Durban 1 0 4 4 296 0 \n", "4 Durban 1 0 5 5 295 0 \n", ".. ... ... ... ... ... ... ... \n", "195 Durban 124 32 2 194 106 4 \n", "196 Durban 125 32 3 195 105 4 \n", "197 Durban 126 32 4 196 104 4 \n", "198 Durban 127 32 5 197 103 4 \n", "199 Durban 131 32 6 198 102 4 \n", "\n", " crr last_five_runs total_score \n", "0 0.000000 0.0 211 \n", "1 3.000000 0.0 211 \n", "2 2.000000 0.0 211 \n", "3 1.500000 0.0 211 \n", "4 1.200000 0.0 211 \n", ".. ... ... ... \n", "195 3.835052 20.0 211 \n", "196 3.846154 20.0 211 \n", "197 3.857143 20.0 211 \n", "198 3.868020 20.0 211 \n", "199 3.969697 24.0 211 \n", "\n", "[200 rows x 16 columns]" ] }, "execution_count": 170, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(200)" ] }, { "cell_type": "code", "execution_count": 171, "id": "a7d0f160-11de-4517-9a39-07ca022a31d6", "metadata": {}, "outputs": [], "source": [ "#label encoding" ] }, { "cell_type": "code", "execution_count": 172, "id": "e3670974-3018-4ebd-9081-9dfeb85b2949", "metadata": {}, "outputs": [], "source": [ "df.drop(columns=['player_dismissed','runs','over','balls_bowled','ball_no','city'],inplace=True)" ] }, { "cell_type": "code", "execution_count": 173, "id": "20552e7c-51d5-49bf-b504-8a1430c8cd34", "metadata": {}, "outputs": [], "source": [ "df.rename(columns={'ball': 'overs'}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 174, "id": "100f6244-de9b-48e7-8142-8cdb3026bf20", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamoverscurrent_scoreballs_leftwicketscrrlast_five_runstotal_score
02South AfricaEngland0.1029900.0000000.0211
12South AfricaEngland0.2129803.0000000.0211
22South AfricaEngland0.3129702.0000000.0211
32South AfricaEngland0.4129601.5000000.0211
42South AfricaEngland0.5129501.2000000.0211
.................................
2767391800AfghanistanBangladesh49.2241474.8851350.0255
2767401800AfghanistanBangladesh49.3245374.9494950.0255
2767411800AfghanistanBangladesh49.4247274.9731540.0255
2767421800AfghanistanBangladesh49.5251175.0367890.0255
2767431800AfghanistanBangladesh49.6255075.1000000.0255
\n", "

276744 rows × 10 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team overs current_score balls_left \\\n", "0 2 South Africa England 0.1 0 299 \n", "1 2 South Africa England 0.2 1 298 \n", "2 2 South Africa England 0.3 1 297 \n", "3 2 South Africa England 0.4 1 296 \n", "4 2 South Africa England 0.5 1 295 \n", "... ... ... ... ... ... ... \n", "276739 1800 Afghanistan Bangladesh 49.2 241 4 \n", "276740 1800 Afghanistan Bangladesh 49.3 245 3 \n", "276741 1800 Afghanistan Bangladesh 49.4 247 2 \n", "276742 1800 Afghanistan Bangladesh 49.5 251 1 \n", "276743 1800 Afghanistan Bangladesh 49.6 255 0 \n", "\n", " wickets crr last_five_runs total_score \n", "0 0 0.000000 0.0 211 \n", "1 0 3.000000 0.0 211 \n", "2 0 2.000000 0.0 211 \n", "3 0 1.500000 0.0 211 \n", "4 0 1.200000 0.0 211 \n", "... ... ... ... ... \n", "276739 7 4.885135 0.0 255 \n", "276740 7 4.949495 0.0 255 \n", "276741 7 4.973154 0.0 255 \n", "276742 7 5.036789 0.0 255 \n", "276743 7 5.100000 0.0 255 \n", "\n", "[276744 rows x 10 columns]" ] }, "execution_count": 174, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 175, "id": "ee573a67-d56b-446c-b089-6f8b5e0c1141", "metadata": {}, "outputs": [], "source": [ "# from sklearn.preprocessing import LabelEncoder\n", "\n", "# # Example categorical data\n", "# teams = ['Australia',\n", "# 'India',\n", "# 'Bangladesh',\n", "# 'New Zealand',\n", "# 'South Africa',\n", "# 'England',\n", "# 'Afghanistan',\n", "# 'Pakistan',\n", "# 'Sri Lanka',\n", "# 'Netherlands']\n", "\n", "# # # # Initialize LabelEncoder\n", "# label_encoder = LabelEncoder()\n", "\n", "# # # # Fit and transform the data\n", "# encoded_teams = label_encoder.fit_transform(teams)\n", "\n", "# # # # Create a DataFrame to display the mapping\n", "# encoding_mapping = pd.DataFrame({'Team': teams, 'Encoded': encoded_teams})\n", "\n", "# # # # Display the mapping\n", "# print(encoding_mapping)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "3579fd10-f6de-47fe-9f45-3ddcbe3aa7e7", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 177, "id": "8f51d319-5f61-4c5e-8c1f-468168f66081", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Team Encoded\n", "0 Australia 1\n", "1 India 4\n", "2 Bangladesh 2\n", "3 New Zealand 6\n", "4 South Africa 8\n", "5 England 3\n", "6 Afghanistan 0\n", "7 Pakistan 7\n", "8 Sri Lanka 9\n", "9 Netherlands 5\n" ] } ], "source": [ "from sklearn.preprocessing import OneHotEncoder\n", "\n", "teams=[\n", " 'Australia',\n", " 'India',\n", " 'Bangladesh',\n", " 'New Zealand',\n", " 'South Africa',\n", " 'England',\n", " 'Afghanistan',\n", " 'Pakistan',\n", " 'Sri Lanka',\n", " 'Netherlands'\n", "# ]\n", "\n", "from sklearn.preprocessing import LabelEncoder\n", "\n", "\n", "\n", "# Initialize LabelEncoder\n", "label_encoder = LabelEncoder()\n", "\n", "# Fit and transform the data\n", "encoded_teams = label_encoder.fit_transform(teams)\n", "\n", "# Create a DataFrame to display the mapping\n", "encoding_mapping = pd.DataFrame({'Team': teams, 'Encoded': encoded_teams})\n", "\n", "# Display the mapping\n", "print(encoding_mapping)\n" ] }, { "cell_type": "code", "execution_count": 64, "id": "e522b9ea-0533-4fa0-95ec-48726467f2cf", "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", "le = LabelEncoder()\n", "for col in ['batting_team', 'bowling_team']:\n", " df[col] = le.fit_transform(df[col])\n", "\n" ] }, { "cell_type": "code", "execution_count": 65, "id": "e0bb841f-340a-4097-b82c-24e6312ebdb3", "metadata": {}, "outputs": [], "source": [ "# ##########\n", "# df['over'] = pd.to_numeric(df['over'], errors='coerce') # Coerce invalid parsing to NaN\n", "# df['ball_no'] = pd.to_numeric(df['ball_no'], errors='coerce')" ] }, { "cell_type": "code", "execution_count": null, "id": "f35da715-e07b-49fc-8cb0-2fbdfea9bcac", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 66, "id": "a5033e11-b7a8-4790-998f-7821e0ae2b58", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamoverscurrent_scoreballs_leftwicketscrrlast_five_runstotal_score
02830.1029900.00.0211
12830.2129803.00.0211
22830.3129702.00.0211
32830.4129601.50.0211
42830.5129501.20.0211
\n", "
" ], "text/plain": [ " match_id batting_team bowling_team overs current_score balls_left \\\n", "0 2 8 3 0.1 0 299 \n", "1 2 8 3 0.2 1 298 \n", "2 2 8 3 0.3 1 297 \n", "3 2 8 3 0.4 1 296 \n", "4 2 8 3 0.5 1 295 \n", "\n", " wickets crr last_five_runs total_score \n", "0 0 0.0 0.0 211 \n", "1 0 3.0 0.0 211 \n", "2 0 2.0 0.0 211 \n", "3 0 1.5 0.0 211 \n", "4 0 1.2 0.0 211 " ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 67, "id": "437c0099-d69e-4445-bad7-9736c72ed2ca", "metadata": {}, "outputs": [], "source": [ "#MODEL BUILDING" ] }, { "cell_type": "code", "execution_count": 68, "id": "a2b6742c-287a-40af-b5f6-0e6dab155983", "metadata": {}, "outputs": [], "source": [ "features = df.drop(['total_score','match_id'], axis=1)\n", "labels = df['total_score']" ] }, { "cell_type": "code", "execution_count": 69, "id": "4be5c6c7-12ea-4c34-885a-c184f76e09d9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training Set : (221395, 8)\n", "Testing Set : (55349, 8)\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.20, shuffle=True)\n", "print(f\"Training Set : {train_features.shape}\\nTesting Set : {test_features.shape}\")" ] }, { "cell_type": "code", "execution_count": 70, "id": "7d0f9efb-84be-46e8-a725-978ede45fe9b", "metadata": {}, "outputs": [], "source": [ "#ML ALGORITHMS" ] }, { "cell_type": "code", "execution_count": 71, "id": "4708096e-2a8a-4fed-b90c-c409c000fd75", "metadata": {}, "outputs": [], "source": [ "models = dict()" ] }, { "cell_type": "code", "execution_count": 72, "id": "aa1ba0ab-394f-41dc-8605-a75cef1acc33", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
DecisionTreeRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "DecisionTreeRegressor()" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.tree import DecisionTreeRegressor\n", "tree = DecisionTreeRegressor()\n", "# Train Model\n", "tree.fit(train_features, train_labels)" ] }, { "cell_type": "code", "execution_count": 77, "id": "c0970481-a304-4306-b9c7-846ff6398143", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train Score : 98.57%\n", "Test Score : 81.41%\n" ] } ], "source": [ "# Evaluate Model\n", "train_score_tree = str(tree.score(train_features, train_labels) * 100)\n", "test_score_tree = str(tree.score(test_features, test_labels) * 100)\n", "print(f'Train Score : {train_score_tree[:5]}%\\nTest Score : {test_score_tree[:5]}%')\n", "models[\"tree\"] = test_score_tree" ] }, { "cell_type": "code", "execution_count": 73, "id": "ec23d0f7-dc3c-47ff-94f1-48eb290ce9ca", "metadata": {}, "outputs": [], "source": [ "# from sklearn.ensemble import RandomForestRegressor\n", "# forest = RandomForestRegressor()\n", "# # Train Model\n", "# forest.fit(train_features, train_labels)" ] }, { "cell_type": "code", "execution_count": 74, "id": "fa8337c3-8c28-4c90-8fb3-56cab99ae832", "metadata": {}, "outputs": [], "source": [ "# forestt=forest.predict(test_features)\n", "# treee=tree.predict(test_features)" ] }, { "cell_type": "code", "execution_count": 78, "id": "2ae58f62-a574-4674-90ff-508dbd717f40", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "---- Decision Tree Regressor - Model Evaluation ----\n", "Mean Absolute Error (MAE): 7.579907696164991\n", "Mean Squared Error (MSE): 683.7400273429961\n", "Root Mean Squared Error (RMSE): 26.148423037403155\n" ] } ], "source": [ "from sklearn.metrics import mean_absolute_error as mae, mean_squared_error as mse\n", "print(\"---- Decision Tree Regressor - Model Evaluation ----\")\n", "print(\"Mean Absolute Error (MAE): {}\".format(mae(test_labels, tree.predict(test_features))))\n", "print(\"Mean Squared Error (MSE): {}\".format(mse(test_labels, tree.predict(test_features))))\n", "print(\"Root Mean Squared Error (RMSE): {}\".format(np.sqrt(mse(test_labels, tree.predict(test_features)))))" ] }, { "cell_type": "code", "execution_count": 75, "id": "ea821bed-0ff2-48a9-83ad-97a3805c575b", "metadata": {}, "outputs": [], "source": [ "# hybrid= 0.6*forestt + 0.4*treee" ] }, { "cell_type": "code", "execution_count": 76, "id": "bd87bc18-4a44-43eb-8e94-27b36272515d", "metadata": {}, "outputs": [], "source": [ "# from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n", "\n", "# mae = mean_absolute_error(test_labels, hybrid)\n", "# mse = mean_squared_error(test_labels, hybrid)\n", "# r2 = r2_score(test_labels, hybrid)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "4e11018f-dc52-4cf5-acab-8a5ea7c728c0", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 79, "id": "a3c3b439-41fc-4efa-bc5b-8a589fd91f4a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "LinearRegression()" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Linear Regression\n", "from sklearn.linear_model import LinearRegression\n", "linreg = LinearRegression()\n", "# Train Model\n", "linreg.fit(train_features, train_labels)" ] }, { "cell_type": "code", "execution_count": 80, "id": "08a00a41-6d2a-442b-b1ae-742e411c8d9e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train Score : 50.02%\n", "Test Score : 49.80%\n" ] } ], "source": [ "# Evaluate Model\n", "train_score_linreg = str(linreg.score(train_features, train_labels) * 100)\n", "test_score_linreg = str(linreg.score(test_features, test_labels) * 100)\n", "print(f'Train Score : {train_score_linreg[:5]}%\\nTest Score : {test_score_linreg[:5]}%')\n", "models[\"linreg\"] = test_score_linreg" ] }, { "cell_type": "code", "execution_count": 81, "id": "3f99f3fc-aaff-4bdb-9137-21fdc15114e3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "---- Linear Regression - Model Evaluation ----\n", "Mean Absolute Error (MAE): 31.31082002675971\n", "Mean Squared Error (MSE): 1846.8923396475604\n", "Root Mean Squared Error (RMSE): 42.97548533347309\n" ] } ], "source": [ "print(\"---- Linear Regression - Model Evaluation ----\")\n", "print(\"Mean Absolute Error (MAE): {}\".format(mae(test_labels, linreg.predict(test_features))))\n", "print(\"Mean Squared Error (MSE): {}\".format(mse(test_labels, linreg.predict(test_features))))\n", "print(\"Root Mean Squared Error (RMSE): {}\".format(np.sqrt(mse(test_labels, linreg.predict(test_features)))))" ] }, { "cell_type": "code", "execution_count": 82, "id": "3a9f9509-cdda-4175-9741-1ef3a7d1c23e", "metadata": {}, "outputs": [], "source": [ "#Random Forest Regression" ] }, { "cell_type": "code", "execution_count": 83, "id": "576d7fa1-5934-417e-8f10-50eb0b72dbca", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
RandomForestRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "RandomForestRegressor()" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.ensemble import RandomForestRegressor\n", "forest = RandomForestRegressor()\n", "# Train Model\n", "forest.fit(train_features, train_labels)" ] }, { "cell_type": "code", "execution_count": 84, "id": "7b754dc6-6bfd-46e8-be18-4499d5a14722", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train Score : 97.42%\n", "Test Score : 88.80%\n" ] } ], "source": [ "# Evaluate Model\n", "train_score_forest = str(forest.score(train_features, train_labels)*100)\n", "test_score_forest = str(forest.score(test_features, test_labels)*100)\n", "print(f'Train Score : {train_score_forest[:5]}%\\nTest Score : {test_score_forest[:5]}%')\n", "models[\"forest\"] = test_score_forest" ] }, { "cell_type": "code", "execution_count": 85, "id": "484e65f5-3bfa-45ee-aae0-219637304e44", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "---- Random Forest Regression - Model Evaluation ----\n", "Mean Absolute Error (MAE): 8.90537374868365\n", "Mean Squared Error (MSE): 411.8180311151398\n", "Root Mean Squared Error (RMSE): 20.29330015337919\n" ] } ], "source": [ "print(\"---- Random Forest Regression - Model Evaluation ----\")\n", "print(\"Mean Absolute Error (MAE): {}\".format(mae(test_labels, forest.predict(test_features))))\n", "print(\"Mean Squared Error (MSE): {}\".format(mse(test_labels, forest.predict(test_features))))\n", "print(\"Root Mean Squared Error (RMSE): {}\".format(np.sqrt(mse(test_labels, forest.predict(test_features)))))" ] }, { "cell_type": "code", "execution_count": 86, "id": "5498f059-7b1a-44da-afb4-6cf92f325505", "metadata": {}, "outputs": [], "source": [ "#SVR" ] }, { "cell_type": "code", "execution_count": 87, "id": "4e21b34f-c482-4aa8-9f45-6b99c0e39e33", "metadata": {}, "outputs": [], "source": [ "#from sklearn.svm import SVR\n", "#svm = SVR()\n", "# Train Model\n", "#svm.fit(train_features, train_labels)" ] }, { "cell_type": "code", "execution_count": 88, "id": "195fd132-e605-4fbc-ab1b-d4003db93fbb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
       "             colsample_bylevel=None, colsample_bynode=None,\n",
       "             colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
       "             enable_categorical=False, eval_metric=None, feature_types=None,\n",
       "             gamma=None, grow_policy=None, importance_type=None,\n",
       "             interaction_constraints=None, learning_rate=None, max_bin=None,\n",
       "             max_cat_threshold=None, max_cat_to_onehot=None,\n",
       "             max_delta_step=None, max_depth=None, max_leaves=None,\n",
       "             min_child_weight=None, missing=nan, monotone_constraints=None,\n",
       "             multi_strategy=None, n_estimators=None, n_jobs=None,\n",
       "             num_parallel_tree=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "XGBRegressor(base_score=None, booster=None, callbacks=None,\n", " colsample_bylevel=None, colsample_bynode=None,\n", " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None, feature_types=None,\n", " gamma=None, grow_policy=None, importance_type=None,\n", " interaction_constraints=None, learning_rate=None, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=None, max_leaves=None,\n", " min_child_weight=None, missing=nan, monotone_constraints=None,\n", " multi_strategy=None, n_estimators=None, n_jobs=None,\n", " num_parallel_tree=None, random_state=None, ...)" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from xgboost import XGBRegressor\n", "xgb = XGBRegressor()\n", "# Train Model\n", "xgb.fit(train_features, train_labels)" ] }, { "cell_type": "code", "execution_count": 89, "id": "6018690d-ad3a-4776-87bb-8f997459ad28", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train Score : 71.44%\n", "Test Score : 69.56%\n" ] } ], "source": [ "train_score_xgb = str(xgb.score(train_features, train_labels)*100)\n", "test_score_xgb = str(xgb.score(test_features, test_labels)*100)\n", "print(f'Train Score : {train_score_xgb[:5]}%\\nTest Score : {test_score_xgb[:5]}%')\n", "models[\"xgb\"] = test_score_xgb" ] }, { "cell_type": "code", "execution_count": 90, "id": "cf65584f-eb2d-4f79-bebe-d04b3392f448", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "---- XGB Regression - Model Evaluation ----\n", "Mean Absolute Error (MAE): 23.35092285882124\n", "Mean Squared Error (MSE): 1119.7148025769684\n", "Root Mean Squared Error (RMSE): 33.46213983858427\n" ] } ], "source": [ "print(\"---- XGB Regression - Model Evaluation ----\")\n", "print(\"Mean Absolute Error (MAE): {}\".format(mae(test_labels, xgb.predict(test_features))))\n", "print(\"Mean Squared Error (MSE): {}\".format(mse(test_labels, xgb.predict(test_features))))\n", "print(\"Root Mean Squared Error (RMSE): {}\".format(np.sqrt(mse(test_labels, xgb.predict(test_features)))))" ] }, { "cell_type": "code", "execution_count": 91, "id": "13a1d3ee-b10a-4ced-ad2b-45af839fd2bf", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
KNeighborsRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "KNeighborsRegressor()" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.neighbors import KNeighborsRegressor\n", "knr = KNeighborsRegressor()\n", "# Train Model\n", "knr.fit(train_features, train_labels)" ] }, { "cell_type": "code", "execution_count": 92, "id": "cd94b6ee-68fe-4ef6-a29c-778a5c31de5a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train Score : 82.68%\n", "Test Score : 70.98%\n" ] } ], "source": [ "train_score_knr = str(knr.score(train_features, train_labels)*100)\n", "test_score_knr = str(knr.score(test_features, test_labels)*100)\n", "print(f'Train Score : {train_score_knr[:5]}%\\nTest Score : {test_score_knr[:5]}%')\n", "models[\"knr\"] = test_score_knr" ] }, { "cell_type": "code", "execution_count": 93, "id": "30f7ced2-646a-4e99-aa69-929dee4cdde4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "---- KNR - Model Evaluation ----\n", "Mean Absolute Error (MAE): 21.148544689154278\n", "Mean Squared Error (MSE): 1067.5358066089725\n", "Root Mean Squared Error (RMSE): 32.67316646131765\n" ] } ], "source": [ "print(\"---- KNR - Model Evaluation ----\")\n", "print(\"Mean Absolute Error (MAE): {}\".format(mae(test_labels, knr.predict(test_features))))\n", "print(\"Mean Squared Error (MSE): {}\".format(mse(test_labels, knr.predict(test_features))))\n", "print(\"Root Mean Squared Error (RMSE): {}\".format(np.sqrt(mse(test_labels, knr.predict(test_features)))))" ] }, { "cell_type": "code", "execution_count": 94, "id": "cceb9ef1-72fd-449f-961c-a08a749a0334", "metadata": {}, "outputs": [], "source": [ "#BEST MODEL" ] }, { "cell_type": "code", "execution_count": 95, "id": "2c0ed506-4894-47f2-b358-cd91f29ba9cf", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt \n", "model_names = list(models.keys())\n", "accuracy = list(map(float, models.values()))\n", "# creating the bar plot\n", "plt.bar(model_names, accuracy)" ] }, { "cell_type": "code", "execution_count": 96, "id": "18bce93c-c8e5-4ef4-b5f0-fcf081b44df5", "metadata": {}, "outputs": [], "source": [ "#From above, we can see that Random Forest performed the best,\n", "#closely followed by Decision Tree and KNR. So we will be choosing Random Forest for the final model" ] }, { "cell_type": "code", "execution_count": 97, "id": "45b8d010-e6ec-4d16-9a7e-27a49ad7d49d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamoverscurrent_scoreballs_leftwicketscrrlast_five_runstotal_score
02830.1029900.0000000.0211
12830.2129803.0000000.0211
22830.3129702.0000000.0211
32830.4129601.5000000.0211
42830.5129501.2000000.0211
.................................
27673918000249.2241474.8851350.0255
27674018000249.3245374.9494950.0255
27674118000249.4247274.9731540.0255
27674218000249.5251175.0367890.0255
27674318000249.6255075.1000000.0255
\n", "

276744 rows × 10 columns

\n", "
" ], "text/plain": [ " match_id batting_team bowling_team overs current_score \\\n", "0 2 8 3 0.1 0 \n", "1 2 8 3 0.2 1 \n", "2 2 8 3 0.3 1 \n", "3 2 8 3 0.4 1 \n", "4 2 8 3 0.5 1 \n", "... ... ... ... ... ... \n", "276739 1800 0 2 49.2 241 \n", "276740 1800 0 2 49.3 245 \n", "276741 1800 0 2 49.4 247 \n", "276742 1800 0 2 49.5 251 \n", "276743 1800 0 2 49.6 255 \n", "\n", " balls_left wickets crr last_five_runs total_score \n", "0 299 0 0.000000 0.0 211 \n", "1 298 0 3.000000 0.0 211 \n", "2 297 0 2.000000 0.0 211 \n", "3 296 0 1.500000 0.0 211 \n", "4 295 0 1.200000 0.0 211 \n", "... ... ... ... ... ... \n", "276739 4 7 4.885135 0.0 255 \n", "276740 3 7 4.949495 0.0 255 \n", "276741 2 7 4.973154 0.0 255 \n", "276742 1 7 5.036789 0.0 255 \n", "276743 0 7 5.100000 0.0 255 \n", "\n", "[276744 rows x 10 columns]" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 98, "id": "ee69e3e2-c5b7-4265-8e5d-59b1d249d047", "metadata": {}, "outputs": [], "source": [ "# batting_team='Afghanistan'\n", "# bowling_team='India'\n", "# score = score_predict(batting_team, bowling_team, over=45, current_score=235, wickets=7, last_five_runs=29)\n", "# print(f'Predicted Score : {score} || Actual Score : 272')" ] }, { "cell_type": "code", "execution_count": 99, "id": "cd65ea8f-8f7c-4ec6-b1d3-c7dd12f11482", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idbatting_teambowling_teamoverscurrent_scoreballs_leftwicketscrrlast_five_runstotal_score
02830.1029900.0000000.0211
12830.2129803.0000000.0211
22830.3129702.0000000.0211
32830.4129601.5000000.0211
42830.5129501.2000000.0211
52830.6129401.0000000.0211
62831.1129300.8571430.0211
72831.2129200.7500000.0211
82831.3129100.6666670.0211
92831.4129000.6000000.0211
102831.5128900.5454550.0211
112831.6128800.5000000.0211
122832.1128700.4615380.0211
132832.2128600.4285710.0211
142832.3128500.4000000.0211
152832.4128400.3750000.0211
162832.5128300.3529410.0211
172832.6128200.3333330.0211
182833.1128110.3157890.0211
192833.2128020.3000000.0211
202833.3327920.8571430.0211
212833.4327820.8181820.0211
222833.5327720.7826090.0211
232833.6327620.7500000.0211
242834.1327520.7200000.0211
252834.2327420.6923080.0211
262834.3327320.6666670.0211
272834.4327220.6428570.0211
282834.5327120.6206900.0211
292834.6327020.6000003.0211
302835.1326920.5806453.0211
312835.2326820.5625002.0211
322835.3326720.5454552.0211
332835.4426620.7058823.0211
342835.5826521.3714297.0211
352835.6826421.3333337.0211
362836.1826321.2972977.0211
372836.2826221.2631587.0211
382836.3826121.2307697.0211
392836.4826021.2000007.0211
402836.5925921.3170738.0211
412836.6925821.2857148.0211
422837.11325721.81395312.0211
432837.21725622.31818216.0211
442837.31725522.26666716.0211
452837.41725422.21739116.0211
462837.51725322.17021316.0211
472837.61725222.12500016.0211
482838.11725122.08163316.0211
492838.21825022.16000017.0211
\n", "
" ], "text/plain": [ " match_id batting_team bowling_team overs current_score balls_left \\\n", "0 2 8 3 0.1 0 299 \n", "1 2 8 3 0.2 1 298 \n", "2 2 8 3 0.3 1 297 \n", "3 2 8 3 0.4 1 296 \n", "4 2 8 3 0.5 1 295 \n", "5 2 8 3 0.6 1 294 \n", "6 2 8 3 1.1 1 293 \n", "7 2 8 3 1.2 1 292 \n", "8 2 8 3 1.3 1 291 \n", "9 2 8 3 1.4 1 290 \n", "10 2 8 3 1.5 1 289 \n", "11 2 8 3 1.6 1 288 \n", "12 2 8 3 2.1 1 287 \n", "13 2 8 3 2.2 1 286 \n", "14 2 8 3 2.3 1 285 \n", "15 2 8 3 2.4 1 284 \n", "16 2 8 3 2.5 1 283 \n", "17 2 8 3 2.6 1 282 \n", "18 2 8 3 3.1 1 281 \n", "19 2 8 3 3.2 1 280 \n", "20 2 8 3 3.3 3 279 \n", "21 2 8 3 3.4 3 278 \n", "22 2 8 3 3.5 3 277 \n", "23 2 8 3 3.6 3 276 \n", "24 2 8 3 4.1 3 275 \n", "25 2 8 3 4.2 3 274 \n", "26 2 8 3 4.3 3 273 \n", "27 2 8 3 4.4 3 272 \n", "28 2 8 3 4.5 3 271 \n", "29 2 8 3 4.6 3 270 \n", "30 2 8 3 5.1 3 269 \n", "31 2 8 3 5.2 3 268 \n", "32 2 8 3 5.3 3 267 \n", "33 2 8 3 5.4 4 266 \n", "34 2 8 3 5.5 8 265 \n", "35 2 8 3 5.6 8 264 \n", "36 2 8 3 6.1 8 263 \n", "37 2 8 3 6.2 8 262 \n", "38 2 8 3 6.3 8 261 \n", "39 2 8 3 6.4 8 260 \n", "40 2 8 3 6.5 9 259 \n", "41 2 8 3 6.6 9 258 \n", "42 2 8 3 7.1 13 257 \n", "43 2 8 3 7.2 17 256 \n", "44 2 8 3 7.3 17 255 \n", "45 2 8 3 7.4 17 254 \n", "46 2 8 3 7.5 17 253 \n", "47 2 8 3 7.6 17 252 \n", "48 2 8 3 8.1 17 251 \n", "49 2 8 3 8.2 18 250 \n", "\n", " wickets crr last_five_runs total_score \n", "0 0 0.000000 0.0 211 \n", "1 0 3.000000 0.0 211 \n", "2 0 2.000000 0.0 211 \n", "3 0 1.500000 0.0 211 \n", "4 0 1.200000 0.0 211 \n", "5 0 1.000000 0.0 211 \n", "6 0 0.857143 0.0 211 \n", "7 0 0.750000 0.0 211 \n", "8 0 0.666667 0.0 211 \n", "9 0 0.600000 0.0 211 \n", "10 0 0.545455 0.0 211 \n", "11 0 0.500000 0.0 211 \n", "12 0 0.461538 0.0 211 \n", "13 0 0.428571 0.0 211 \n", "14 0 0.400000 0.0 211 \n", "15 0 0.375000 0.0 211 \n", "16 0 0.352941 0.0 211 \n", "17 0 0.333333 0.0 211 \n", "18 1 0.315789 0.0 211 \n", "19 2 0.300000 0.0 211 \n", "20 2 0.857143 0.0 211 \n", "21 2 0.818182 0.0 211 \n", "22 2 0.782609 0.0 211 \n", "23 2 0.750000 0.0 211 \n", "24 2 0.720000 0.0 211 \n", "25 2 0.692308 0.0 211 \n", "26 2 0.666667 0.0 211 \n", "27 2 0.642857 0.0 211 \n", "28 2 0.620690 0.0 211 \n", "29 2 0.600000 3.0 211 \n", "30 2 0.580645 3.0 211 \n", "31 2 0.562500 2.0 211 \n", "32 2 0.545455 2.0 211 \n", "33 2 0.705882 3.0 211 \n", "34 2 1.371429 7.0 211 \n", "35 2 1.333333 7.0 211 \n", "36 2 1.297297 7.0 211 \n", "37 2 1.263158 7.0 211 \n", "38 2 1.230769 7.0 211 \n", "39 2 1.200000 7.0 211 \n", "40 2 1.317073 8.0 211 \n", "41 2 1.285714 8.0 211 \n", "42 2 1.813953 12.0 211 \n", "43 2 2.318182 16.0 211 \n", "44 2 2.266667 16.0 211 \n", "45 2 2.217391 16.0 211 \n", "46 2 2.170213 16.0 211 \n", "47 2 2.125000 16.0 211 \n", "48 2 2.081633 16.0 211 \n", "49 2 2.160000 17.0 211 " ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(50)\n" ] }, { "cell_type": "code", "execution_count": 100, "id": "37375ceb-9f7a-425c-91b9-10730aba908b", "metadata": {}, "outputs": [], "source": [ "# Team Encoded\n", "# 0 Australia 1\n", "# 1 India 4\n", "# 2 Bangladesh 2\n", "# 3 New Zealand 6\n", "# 4 South Africa 8\n", "# 5 England 3\n", "# 6 Afghanistan 0\n", "# 7 Pakistan 7\n", "# 8 Sri Lanka 9\n", "# 9 Netherlands 5" ] }, { "cell_type": "code", "execution_count": 101, "id": "e2ba478c-98a1-4e85-a9f4-ea7c19ba75d4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([388.92])" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "forest.predict([[4,6,43,327,42,2,7.55,52]])" ] }, { "cell_type": "code", "execution_count": 110, "id": "a5a71b78-7ddc-4919-b746-f93ab932a0c5", "metadata": {}, "outputs": [], "source": [ "# df" ] }, { "cell_type": "code", "execution_count": null, "id": "cedc4232-bb3d-4d1b-9ca2-9d0fa38b1a7f", "metadata": {}, "outputs": [], "source": [ "forest.predict([[4,6,43,327,42,2,7.55,52]])" ] }, { "cell_type": "code", "execution_count": null, "id": "f1574bec-9393-4bfd-9d7a-7ba243032550", "metadata": {}, "outputs": [], "source": [ "# Team Encoded\n", "# 0 Australia 1\n", "# 1 India 4\n", "# 2 Bangladesh 2\n", "# 3 New Zealand 6\n", "# 4 South Africa 8\n", "# 5 England 3\n", "# 6 Afghanistan 0\n", "# 7 Pakistan 7\n", "# 8 Sri Lanka 9\n", "# 9 Netherlands 5\n" ] }, { "cell_type": "code", "execution_count": 113, "id": "2af34882-87ba-4092-9397-a79bb1929927", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([283.19666667])" ] }, "execution_count": 113, "metadata": {}, "output_type": "execute_result" } ], "source": [ "forest.predict([[3,6,10,51,240,1,5.1,23]])" ] }, { "cell_type": "code", "execution_count": 178, "id": "2ff9bbfc-c9f2-4776-a97e-057830f713a5", "metadata": {}, "outputs": [], "source": [ "import pickle\n", "\n", "pickle.dump(forest, open('model.pkl', 'wb'))" ] }, { "cell_type": "code", "execution_count": null, "id": "7557e034-f383-49f7-b8cd-02f6cc3f1b95", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.3" } }, "nbformat": 4, "nbformat_minor": 5 }