{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'modal'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[1], line 7\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mhopsworks\u001b[39;00m\n\u001b[0;32m      6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mre\u001b[39;00m \n\u001b[1;32m----> 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmodal\u001b[39;00m \n\u001b[0;32m      8\u001b[0m \u001b[38;5;66;03m#prepocessing\u001b[39;00m\n\u001b[0;32m      9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mrequests\u001b[39;00m\n",
      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'modal'"
     ]
    }
   ],
   "source": [
    "from dotenv import load_dotenv\n",
    "import os \n",
    "from alpha_vantage.timeseries import TimeSeries\n",
    "import pandas as pd\n",
    "import hopsworks\n",
    "import re \n",
    "import modal \n",
    "#prepocessing\n",
    "import requests\n",
    "import pandas as pd\n",
    "import json\n",
    "#import pandas_market_calendars as mcal\n",
    "import datetime\n",
    "import numpy as np\n",
    "from datetime import timedelta \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "            1. open  2. high    3. low  4. close    5. volume ticker\n",
      "date                                                                \n",
      "2024-05-03   182.10   184.78  178.4200    181.19   75491539.0   TSLA\n",
      "2024-05-02   182.86   184.60  176.0200    180.01   89148041.0   TSLA\n",
      "2024-05-01   182.00   185.86  179.0100    179.99   92829719.0   TSLA\n",
      "2024-04-30   186.98   190.95  182.8401    183.28  127031787.0   TSLA\n",
      "2024-04-29   188.42   198.87  184.5400    194.05  243869678.0   TSLA\n"
     ]
    }
   ],
   "source": [
    "\n",
    "\n",
    "load_dotenv()\n",
    "\n",
    "api_key = os.environ.get('stocks_api') # Replace this with your actual API key\n",
    "ts = TimeSeries(key=api_key, output_format='pandas')\n",
    "\n",
    "def fetch_stock_prices(symbol):\n",
    "    # Fetch daily adjusted stock prices; adjust the symbol as needed\n",
    "    data, meta_data = ts.get_daily(symbol=symbol, outputsize='full')\n",
    "    \n",
    "    # Add a new column named 'ticker' and fill it with the ticker name\n",
    "    data['ticker'] = symbol\n",
    "    \n",
    "    return data\n",
    "\n",
    "# Example usage\n",
    "symbol = 'TSLA'\n",
    "stock_data = fetch_stock_prices(symbol)\n",
    "print(stock_data.head())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_tsla_history():\n",
    "\n",
    "    start_date = datetime.datetime.strptime('2015-07-16',\"%Y-%m-%d\")\n",
    "    end_date = datetime.datetime.strptime('2023-01-05',\"%Y-%m-%d\")\n",
    "\n",
    "    # Get the TSLA stock data from yfinance\n",
    "    tsla = Ticker(\"TSLA\")\n",
    "    # info = tsla.info\n",
    "\n",
    "    # get historical market data\n",
    "    data = tsla.history(start=start_date, end=end_date)\n",
    "\n",
    "    # drop some columns\n",
    "    tesla_df = data.drop(columns=['Dividends','Stock Splits'])\n",
    "    tesla_df.index = tesla_df.index.strftime('%Y-%m-%d')\n",
    "    \n",
    "    print('Number of business days included in data set: ',np.shape(tesla_df))\n",
    "\n",
    "    # Create an array of all dates in the specified period\n",
    "    all_dates = np.array([start_date + datetime.timedelta(days=i) for i in range((end_date - start_date).days)])\n",
    "    all_dates = [d.strftime('%Y-%m-%d') for d in all_dates]\n",
    "\n",
    "    # Use setdiff1d() to find the non-business days\n",
    "    isBusinessDay, _ = extract_business_day(start_date='2015-07-16',end_date='2023-01-04')\n",
    "    non_business_days = np.setdiff1d(all_dates, isBusinessDay)\n",
    "\n",
    "    # Add nan-values to the non-business days\n",
    "    print('Add {} non business days with NaN-values'.format(len(non_business_days)))\n",
    "    for d in non_business_days:\n",
    "        tesla_df.loc[d,:] = [np.nan,np.nan,np.nan,np.nan,np.nan]\n",
    "\n",
    "    # sort index (dates)\n",
    "    tesla_df = tesla_df.sort_index()\n",
    " \n",
    "    # move \"date\"-index into its own column\n",
    "    tesla_df = tesla_df.reset_index()\n",
    "    \n",
    "    # Rename column 'Date' to 'date'\n",
    "    tesla_df = tesla_df.rename(columns={'Date': 'date'})\n",
    "    print('Final size of dataframe',np.shape(tesla_df))\n",
    "    \n",
    "    # Write the merged dataframe to a CSV file\n",
    "    start_date ='2022-04-01'\n",
    "    end_date = '2024-04-01'\n",
    "    save_path = \"data/stock/tesla_{}-{}.csv\".format(start_date,end_date)\n",
    "    \n",
    "    print('Save at :',save_path)\n",
    "    tesla_df.to_csv(save_path, index=False)\n",
    "    \n",
    "    return tesla_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def extract_business_day(start_date,end_date):\n",
    "    \"\"\"\n",
    "    Given a start_date and end_date.\n",
    "    \n",
    "    `Returns`:\n",
    "    \n",
    "    isBusinessDay: list of str (with all dates being business days)\n",
    "    is_open: boolean list\n",
    "        e.g is_open = [1,0,...,1] means that start_date = open, day after start_date = closed, and end_date = open\n",
    "    \"\"\"\n",
    "    \n",
    "    # Save for later\n",
    "    end_date_save = end_date\n",
    "    \n",
    "    # Get the NYSE calendar\n",
    "    cal = mcal.get_calendar('NYSE')\n",
    "\n",
    "    # Get the NYSE calendar's open and close times for the specified period\n",
    "    schedule = cal.schedule(start_date=start_date, end_date=end_date)\n",
    "    \n",
    "    # Only need a list of dates when it's open (not open and close times)\n",
    "    isBusinessDay = np.array(schedule.market_open.dt.strftime('%Y-%m-%d')) \n",
    "    \n",
    "    # Go over all days: \n",
    "    delta = datetime.timedelta(days=1)\n",
    "    start_date = datetime.datetime.strptime(start_date,\"%Y-%m-%d\") #datetime.date(2015, 7, 16)\n",
    "    end_date = datetime.datetime.strptime(end_date,\"%Y-%m-%d\") #datetime.date(2023, 1, 4)\n",
    "    \n",
    "    # Extract days from the timedelta object\n",
    "    num_days = (end_date - start_date).days + 1\n",
    "    \n",
    "    # Create boolean array for days being open (1) and closed (0) \n",
    "    is_open = np.zeros(num_days)\n",
    "    \n",
    "    # iterate over range of dates\n",
    "    current_BusinessDay = isBusinessDay[0]\n",
    "    count_dates = 0\n",
    "    next_BusinessDay = 0\n",
    "    \n",
    "    while (start_date <= end_date):\n",
    "    \n",
    "        if start_date.strftime('%Y-%m-%d') == current_BusinessDay:\n",
    "            is_open[count_dates] = True\n",
    "\n",
    "            if current_BusinessDay == end_date_save or current_BusinessDay==isBusinessDay[-1]:\n",
    "                break\n",
    "            else:\n",
    "                next_BusinessDay += 1\n",
    "                current_BusinessDay = isBusinessDay[next_BusinessDay]\n",
    "        else:\n",
    "            is_open[count_dates] = False\n",
    "\n",
    "        count_dates += 1   \n",
    "        start_date += delta\n",
    "        \n",
    "    print(np.shape(is_open))\n",
    "        \n",
    "    return isBusinessDay, is_open"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Data saved to TSLA_stock_price.csv\n"
     ]
    }
   ],
   "source": [
    "# Define your file path and name\n",
    "file_path = 'TSLA_stock_price.csv'  # Customize the path and filename\n",
    "\n",
    "# Save the DataFrame to CSV\n",
    "stock_data.to_csv(file_path)\n",
    "\n",
    "print(f\"Data saved to {file_path}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}