{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "\n", "# Novel Variation Detection\n", "\n" ], "metadata": { "id": "BnYTwM3OivB4" } }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "L96SNQ8HVI7m" }, "outputs": [], "source": [ "# imports\n", "import tensorflow as tf\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from sklearn.preprocessing import StandardScaler\n", "from imblearn.over_sampling import RandomOverSampler\n", "import seaborn as sns\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "source": [ "# using drive to load our dataset\n", "from google.colab import drive\n", "drive.mount('/content/drive')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Ea3adROCVORJ", "outputId": "eceb945e-4488-4ac0-ba2a-50005e6a95ef" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ] }, { "cell_type": "code", "source": [ "df=pd.read_csv('/content/drive/MyDrive/dataset/lc.csv')\n", "del df['YELLOW_FINGERS'],df['ANXIETY'],df['CHRONIC DISEASE'],df['SHORTNESS OF BREATH'],df['SWALLOWING DIFFICULTY'],df['FATIGUE ']\n", "df" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 423 }, "id": "mFDmqdaodqI4", "outputId": "97d8ae49-21ef-4c8f-82c2-719f721b6c40" }, "execution_count": 10, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " GENDER AGE SMOKING PEER_PRESSURE ALLERGY WHEEZING \\\n", "0 M 69 1 1 1 2 \n", "1 M 74 2 1 2 1 \n", "2 F 59 1 2 1 2 \n", "3 M 63 2 1 1 1 \n", "4 F 63 1 1 1 2 \n", ".. ... ... ... ... ... ... \n", "304 F 56 1 2 1 1 \n", "305 M 70 2 1 2 2 \n", "306 M 58 2 1 2 2 \n", "307 M 67 2 1 2 1 \n", "308 M 62 1 2 2 2 \n", "\n", " ALCOHOL CONSUMING COUGHING CHEST PAIN LUNG_CANCER \n", "0 2 2 2 YES \n", "1 1 1 2 YES \n", "2 1 2 2 NO \n", "3 2 1 2 NO \n", "4 1 2 1 NO \n", ".. ... ... ... ... \n", "304 2 2 1 YES \n", "305 2 2 2 YES \n", "306 2 2 2 YES \n", "307 2 2 2 YES \n", "308 2 1 1 YES \n", "\n", "[309 rows x 10 columns]" ], "text/html": [ "\n", "
\n", " | GENDER | \n", "AGE | \n", "SMOKING | \n", "PEER_PRESSURE | \n", "ALLERGY | \n", "WHEEZING | \n", "ALCOHOL CONSUMING | \n", "COUGHING | \n", "CHEST PAIN | \n", "LUNG_CANCER | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "M | \n", "69 | \n", "1 | \n", "1 | \n", "1 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "YES | \n", "
1 | \n", "M | \n", "74 | \n", "2 | \n", "1 | \n", "2 | \n", "1 | \n", "1 | \n", "1 | \n", "2 | \n", "YES | \n", "
2 | \n", "F | \n", "59 | \n", "1 | \n", "2 | \n", "1 | \n", "2 | \n", "1 | \n", "2 | \n", "2 | \n", "NO | \n", "
3 | \n", "M | \n", "63 | \n", "2 | \n", "1 | \n", "1 | \n", "1 | \n", "2 | \n", "1 | \n", "2 | \n", "NO | \n", "
4 | \n", "F | \n", "63 | \n", "1 | \n", "1 | \n", "1 | \n", "2 | \n", "1 | \n", "2 | \n", "1 | \n", "NO | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
304 | \n", "F | \n", "56 | \n", "1 | \n", "2 | \n", "1 | \n", "1 | \n", "2 | \n", "2 | \n", "1 | \n", "YES | \n", "
305 | \n", "M | \n", "70 | \n", "2 | \n", "1 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "YES | \n", "
306 | \n", "M | \n", "58 | \n", "2 | \n", "1 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "YES | \n", "
307 | \n", "M | \n", "67 | \n", "2 | \n", "1 | \n", "2 | \n", "1 | \n", "2 | \n", "2 | \n", "2 | \n", "YES | \n", "
308 | \n", "M | \n", "62 | \n", "1 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "1 | \n", "1 | \n", "YES | \n", "
309 rows × 10 columns
\n", "\n", " | GENDER | \n", "AGE | \n", "SMOKING | \n", "PEER_PRESSURE | \n", "ALLERGY | \n", "WHEEZING | \n", "ALCOHOL CONSUMING | \n", "CHEST PAIN | \n", "LUNG_CANCER | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "69 | \n", "1 | \n", "1 | \n", "1 | \n", "2 | \n", "2 | \n", "2 | \n", "1 | \n", "
1 | \n", "1 | \n", "74 | \n", "2 | \n", "1 | \n", "2 | \n", "1 | \n", "1 | \n", "2 | \n", "1 | \n", "
2 | \n", "0 | \n", "59 | \n", "1 | \n", "2 | \n", "1 | \n", "2 | \n", "1 | \n", "2 | \n", "0 | \n", "
3 | \n", "1 | \n", "63 | \n", "2 | \n", "1 | \n", "1 | \n", "1 | \n", "2 | \n", "2 | \n", "0 | \n", "
4 | \n", "0 | \n", "63 | \n", "1 | \n", "1 | \n", "1 | \n", "2 | \n", "1 | \n", "1 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
304 | \n", "0 | \n", "56 | \n", "1 | \n", "2 | \n", "1 | \n", "1 | \n", "2 | \n", "1 | \n", "1 | \n", "
305 | \n", "1 | \n", "70 | \n", "2 | \n", "1 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "1 | \n", "
306 | \n", "1 | \n", "58 | \n", "2 | \n", "1 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "1 | \n", "
307 | \n", "1 | \n", "67 | \n", "2 | \n", "1 | \n", "2 | \n", "1 | \n", "2 | \n", "2 | \n", "1 | \n", "
308 | \n", "1 | \n", "62 | \n", "1 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "1 | \n", "1 | \n", "
309 rows × 9 columns
\n", "