{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.10","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"import pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nplt.style.use('dark_background')","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df = pd.read_csv('../input/nba2k20-player-dataset/nba2k20-full.csv')\ndf.head()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.shape","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.info()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df = df.dropna()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.isnull().sum().sum()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df = df.drop(['full_name', 'b_day', 'height', 'weight', 'college'], axis = 1)\ndf.head()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"def removehash(value):\n value = value[1:]\n return int(value)\n\ndf['jersey'] = df['jersey'].apply(removehash)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.head()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df['salary'] = df['salary'].apply(removehash)\ndf.head()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df['team'].value_counts()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df['country'].value_counts()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"def removecountryoutlier(value):\n if value not in ['USA', 'Canada', 'Australia']:\n return 'Others'\n else:\n return value\n\ndf['country'] = df['country'].apply(removecountryoutlier)\ndf['country'].value_counts()\n ","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df['position'].value_counts()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.head()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df['draft_round'].unique","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df['draft_peak'].unique","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"def removeundrafted(value):\n if value=='Undrafted':\n return \n else:\n return value\n \ndf['draft_round'] = df['draft_round'].apply(removeundrafted)\ndf['draft_peak'] = df['draft_peak'].apply(removeundrafted)\n\ndf = df.dropna()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.preprocessing import LabelEncoder\nle = LabelEncoder()\ndf['position'] = le.fit_transform(df['position'])\ndf['country'] = le.fit_transform(df['country'])\ndf['team'] = le.fit_transform(df['team'])\n\ndf.head()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"x = df.drop(['salary'], axis = 1)\ny = df['salary']","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.model_selection import train_test_split\nx_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.linear_model import LinearRegression\nlr = LinearRegression()\nlr.fit(x_train, y_train)\npredictions = lr.predict(x_test)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"diff = y_test - predictions","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import seaborn as sns\nsns.distplot(diff)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import pickle\npickle.dump(lr, open('./model.sav', 'wb'))","metadata":{"trusted":true},"execution_count":null,"outputs":[]}]}