{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'modal'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[1], line 7\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mhopsworks\u001b[39;00m\n\u001b[0;32m 6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mre\u001b[39;00m \n\u001b[1;32m----> 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmodal\u001b[39;00m \n\u001b[0;32m 8\u001b[0m \u001b[38;5;66;03m#prepocessing\u001b[39;00m\n\u001b[0;32m 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mrequests\u001b[39;00m\n", "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'modal'" ] } ], "source": [ "from dotenv import load_dotenv\n", "import os \n", "from alpha_vantage.timeseries import TimeSeries\n", "import pandas as pd\n", "import hopsworks\n", "import re \n", "import modal \n", "#prepocessing\n", "import requests\n", "import pandas as pd\n", "import json\n", "#import pandas_market_calendars as mcal\n", "import datetime\n", "import numpy as np\n", "from datetime import timedelta \n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 1. open 2. high 3. low 4. close 5. volume ticker\n", "date \n", "2024-05-03 182.10 184.78 178.4200 181.19 75491539.0 TSLA\n", "2024-05-02 182.86 184.60 176.0200 180.01 89148041.0 TSLA\n", "2024-05-01 182.00 185.86 179.0100 179.99 92829719.0 TSLA\n", "2024-04-30 186.98 190.95 182.8401 183.28 127031787.0 TSLA\n", "2024-04-29 188.42 198.87 184.5400 194.05 243869678.0 TSLA\n" ] } ], "source": [ "\n", "\n", "load_dotenv()\n", "\n", "api_key = os.environ.get('stocks_api') # Replace this with your actual API key\n", "ts = TimeSeries(key=api_key, output_format='pandas')\n", "\n", "def fetch_stock_prices(symbol):\n", " # Fetch daily adjusted stock prices; adjust the symbol as needed\n", " data, meta_data = ts.get_daily(symbol=symbol, outputsize='full')\n", " \n", " # Add a new column named 'ticker' and fill it with the ticker name\n", " data['ticker'] = symbol\n", " \n", " return data\n", "\n", "# Example usage\n", "symbol = 'TSLA'\n", "stock_data = fetch_stock_prices(symbol)\n", "print(stock_data.head())\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def create_tsla_history():\n", "\n", " start_date = datetime.datetime.strptime('2015-07-16',\"%Y-%m-%d\")\n", " end_date = datetime.datetime.strptime('2023-01-05',\"%Y-%m-%d\")\n", "\n", " # Get the TSLA stock data from yfinance\n", " tsla = Ticker(\"TSLA\")\n", " # info = tsla.info\n", "\n", " # get historical market data\n", " data = tsla.history(start=start_date, end=end_date)\n", "\n", " # drop some columns\n", " tesla_df = data.drop(columns=['Dividends','Stock Splits'])\n", " tesla_df.index = tesla_df.index.strftime('%Y-%m-%d')\n", " \n", " print('Number of business days included in data set: ',np.shape(tesla_df))\n", "\n", " # Create an array of all dates in the specified period\n", " all_dates = np.array([start_date + datetime.timedelta(days=i) for i in range((end_date - start_date).days)])\n", " all_dates = [d.strftime('%Y-%m-%d') for d in all_dates]\n", "\n", " # Use setdiff1d() to find the non-business days\n", " isBusinessDay, _ = extract_business_day(start_date='2015-07-16',end_date='2023-01-04')\n", " non_business_days = np.setdiff1d(all_dates, isBusinessDay)\n", "\n", " # Add nan-values to the non-business days\n", " print('Add {} non business days with NaN-values'.format(len(non_business_days)))\n", " for d in non_business_days:\n", " tesla_df.loc[d,:] = [np.nan,np.nan,np.nan,np.nan,np.nan]\n", "\n", " # sort index (dates)\n", " tesla_df = tesla_df.sort_index()\n", " \n", " # move \"date\"-index into its own column\n", " tesla_df = tesla_df.reset_index()\n", " \n", " # Rename column 'Date' to 'date'\n", " tesla_df = tesla_df.rename(columns={'Date': 'date'})\n", " print('Final size of dataframe',np.shape(tesla_df))\n", " \n", " # Write the merged dataframe to a CSV file\n", " start_date ='2022-04-01'\n", " end_date = '2024-04-01'\n", " save_path = \"data/stock/tesla_{}-{}.csv\".format(start_date,end_date)\n", " \n", " print('Save at :',save_path)\n", " tesla_df.to_csv(save_path, index=False)\n", " \n", " return tesla_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def extract_business_day(start_date,end_date):\n", " \"\"\"\n", " Given a start_date and end_date.\n", " \n", " `Returns`:\n", " \n", " isBusinessDay: list of str (with all dates being business days)\n", " is_open: boolean list\n", " e.g is_open = [1,0,...,1] means that start_date = open, day after start_date = closed, and end_date = open\n", " \"\"\"\n", " \n", " # Save for later\n", " end_date_save = end_date\n", " \n", " # Get the NYSE calendar\n", " cal = mcal.get_calendar('NYSE')\n", "\n", " # Get the NYSE calendar's open and close times for the specified period\n", " schedule = cal.schedule(start_date=start_date, end_date=end_date)\n", " \n", " # Only need a list of dates when it's open (not open and close times)\n", " isBusinessDay = np.array(schedule.market_open.dt.strftime('%Y-%m-%d')) \n", " \n", " # Go over all days: \n", " delta = datetime.timedelta(days=1)\n", " start_date = datetime.datetime.strptime(start_date,\"%Y-%m-%d\") #datetime.date(2015, 7, 16)\n", " end_date = datetime.datetime.strptime(end_date,\"%Y-%m-%d\") #datetime.date(2023, 1, 4)\n", " \n", " # Extract days from the timedelta object\n", " num_days = (end_date - start_date).days + 1\n", " \n", " # Create boolean array for days being open (1) and closed (0) \n", " is_open = np.zeros(num_days)\n", " \n", " # iterate over range of dates\n", " current_BusinessDay = isBusinessDay[0]\n", " count_dates = 0\n", " next_BusinessDay = 0\n", " \n", " while (start_date <= end_date):\n", " \n", " if start_date.strftime('%Y-%m-%d') == current_BusinessDay:\n", " is_open[count_dates] = True\n", "\n", " if current_BusinessDay == end_date_save or current_BusinessDay==isBusinessDay[-1]:\n", " break\n", " else:\n", " next_BusinessDay += 1\n", " current_BusinessDay = isBusinessDay[next_BusinessDay]\n", " else:\n", " is_open[count_dates] = False\n", "\n", " count_dates += 1 \n", " start_date += delta\n", " \n", " print(np.shape(is_open))\n", " \n", " return isBusinessDay, is_open" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Data saved to TSLA_stock_price.csv\n" ] } ], "source": [ "# Define your file path and name\n", "file_path = 'TSLA_stock_price.csv' # Customize the path and filename\n", "\n", "# Save the DataFrame to CSV\n", "stock_data.to_csv(file_path)\n", "\n", "print(f\"Data saved to {file_path}\")" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }