File size: 4,433 Bytes

a3abb69

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a06300a0-6379-4cb8-b015-0e6e689ab64a",
   "metadata": {},
   "source": [
    "This Jupyter notebook script sets up a basic prediction model while intentionally incorporating different types of potential vulnerabilities including usage of older version of a library, hardcoded secrets, and PII."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a4e7e5b2-3c14-44fb-808f-7241b2e75658",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 1: (Forcing an installation of an older version of libraries)\n",
    "\n",
    "!pip install numpy==1.16.0\n",
    "!pip install scikit-learn==0.19.0  # vulnerable version of scikit-learn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8f3c422-d9e6-497a-a7b2-ec91fee80fa4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 2: (Importing libraries including the one with older version)\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import sklearn\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LinearRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3fc98c03-cc4c-4a3b-a5d9-41523c26930f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Including a non-permissible licensed library\n",
    "import oct2py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bb7a28c5-ac7f-4574-990d-d25c7670f211",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 3: (API tokens and secrets)\n",
    "azure_access_key = \"Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dd875b59-7454-4c81-88c2-37cf011ed332",
   "metadata": {},
   "outputs": [],
   "source": [
    "gcloud_api_key = \"AIzaQwerty12345678Xx\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "87d8ed66-bb6d-46b1-9968-b7d5b2cf49df",
   "metadata": {},
   "outputs": [],
   "source": [
    "youtube_api_key = \"AIzaSyCewf3U1ZXHH4E2mK2s8A2D\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f2bae80c-9132-4931-8182-fafbe4a414a1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Dummy PII Data\n",
    "person = {\n",
    "    'first_name': 'John',\n",
    "    'last_name': 'Doe',\n",
    "    'ssn': '123-45-6789',\n",
    "    'address': '1600 Amphitheatre Parkway, Mountain View, CA'\n",
    "}  # this could be seen as PII"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0ff71e14-4cf7-47f8-be2f-7a2f93d7900a",
   "metadata": {},
   "outputs": [],
   "source": [
    "USER_NAME = 'Joe Smith'  #another PII in another format"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a21aa9d-96ec-4555-9d1a-fa0f2cd39802",
   "metadata": {},
   "outputs": [],
   "source": [
    "EMAIL = 'john.doe@example.com'  # also PII"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fb202b7d-a7ef-4d6e-89ec-6e5aa01422d0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 4: (Model building)\n",
    "\n",
    "# Creating a dataset\n",
    "np.random.seed(0)\n",
    "x = np.random.rand(100, 1)\n",
    "y = 2 + 3 * x + np.random.rand(100, 1)\n",
    "\n",
    "# Splitting the data\n",
    "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)\n",
    "\n",
    "# Model initialization\n",
    "regression_model = LinearRegression()\n",
    "\n",
    "# Fit the data(train the model)\n",
    "regression_model.fit(x_train, y_train)\n",
    "\n",
    "# Predict\n",
    "y_predicted = regression_model.predict(x_test)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}