{ "cells": [ { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "from calendar import monthrange\n", "from feature_engineering import *\n", "import glob\n", "import pandas as pd\n", "from dotenv import load_dotenv\n", "import os\n", "\n", "load_dotenv()\n", "\n", "# Set the API endpoint and your API key\n", "endpoint = \"https://api.marketaux.com/v1/news/all?symbols=TSLA&filter_entities=true&published_after=2021&language=en&api_token=iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs\"\n", "api_key = os.environ.get('news_api')\n", "\n", "# Set the ticker symbol\n", "ticker = \"TSLA\" #TSLA" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "def getNews_historical(api_key,endpoint,ticker,year,month,num=1000):\n", " \n", " for start,end in zip([1,15],[16,monthrange(year, month)[1]]):\n", " \n", " from_date = '{}-{:02d}-{:02d}'.format(year,month,start)\n", " to_date = '{}-{:02d}-{:02d}'.format(year,month,end)\n", " \n", " print('Grabbing News data between {}-{}'.format(from_date,to_date)) \n", " news = getNews(api_key,endpoint,ticker,from_date,to_date)\n", " \n", " print('Number of articles: ',len(news.index))\n", " news.head(n=num)\n", "\n", " # Store the dataframe as a CSV file\n", " news.to_csv(\"/Users/manos/Documents/BDS/MLops_mod/TSLA_news_{}_to_{}.csv\".format(from_date,to_date))" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'meta': {'found': 58203, 'returned': 3, 'limit': 3, 'page': 1}, 'data': [{'uuid': 'a2f5f0e0-937a-4333-9aa7-da32fb0ede1f', 'title': \"What's next for Big Tech? See what SA analysts have to say\", 'description': 'Technology stocks have dropped over the past couple of weeks. See what SA analysts have to say about the overall state of tech and the economy.', 'keywords': '', 'snippet': 'Technology stocks have dropped over the past couple of weeks, and it was further seen with the selloff in Meta Platforms (META) and weak GDP data, as the two ac...', 'url': 'https://seekingalpha.com/news/4094186-tech-stocks-dive-see-what-sa-analysts-have-to-say', 'image_url': 'https://static.seekingalpha.com/cdn/s3/uploads/getty_images/184997191/image_184997191.jpg?io=getty-c-w750', 'language': 'en', 'published_at': '2024-04-26T12:20:54.000000Z', 'source': 'seekingalpha.com', 'relevance_score': None, 'entities': [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'exchange': None, 'exchange_long': None, 'country': 'us', 'type': 'equity', 'industry': 'Consumer Cyclical', 'match_score': 11.309888, 'sentiment_score': 0, 'highlights': [{'highlight': 'Tesla (TSLA) -3.5% .\\n\\nTech ETFs', 'sentiment': 0, 'highlighted_in': 'main_text'}]}], 'similar': []}, {'uuid': '650adf2f-d62f-478d-9322-05d3e7d7532d', 'title': 'Stellantis And Tesla: Combine These Stocks For The Ultimate Automotive Portfolio (STLA)', 'description': 'Tesla and Stellantis are two automakers that complement each other. Find out why I see both STLA and TSLA stocks as currently undervalued.', 'keywords': '', 'snippet': 'Tramino/iStock Unreleased via Getty Images\\n\\nStellantis N.V. (NYSE:STLA) and Tesla, Inc. (TSLA) are two very distinct automakers that, in my view, perfectly comp...', 'url': 'https://seekingalpha.com/article/4686610-stellantis-tesla-combine-these-stocks-for-ultimate-automotive-portfolio', 'image_url': 'https://static.seekingalpha.com/cdn/s3/uploads/getty_images/1305717707/image_1305717707.jpg?io=getty-c-w1536', 'language': 'en', 'published_at': '2024-04-26T10:58:06.000000Z', 'source': 'seekingalpha.com', 'relevance_score': None, 'entities': [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'exchange': None, 'exchange_long': None, 'country': 'us', 'type': 'equity', 'industry': 'Consumer Cyclical', 'match_score': 22.866589, 'sentiment_score': 0.173982, 'highlights': [{'highlight': '(NYSE:STLA) and Tesla, Inc. (TSLA) are two very distinct automakers that, in my view, perfectly complement each other. By entering a 50/50 balanced position in the two companies, you can create an “artificial” automaker in your portfolio that is bound to dominate the industry and provide superior returns for shareholders.', 'sentiment': 0.8519, 'highlighted_in': 'main_text'}, {'highlight': 'The brands of “TESSA” include:\\n\\nTesla, the leading global EV brand and #1 most valuable car brand in the world. Because of Tesla’s aggressive price policy lately, I believe it makes almost no economic sense to buy an EV that is not a Tesla, for the majority of consumers. More on this shortly.', 'sentiment': 0.2089, 'highlighted_in': 'main_text'}, {'highlight': 'The two overall car brand portfolios encompass all market segments\\n\\nGoing beyond EVs, I see “TESSA’s” car portfolio to cover all segments, again because of the complementarity of Tesla and Stellantis. The below chart outlines how all car segments are covered by either Tesla or Stellantis.', 'sentiment': 0, 'highlighted_in': 'main_text'}, {'highlight': 'To be fair, both Stellantis and Tesla margins declined in 2023, and in the case of Tesla, the company just reported that margins are now down to 5.5% after Q1 price cuts.\\n\\nHowever, I believe that Tesla’s margins at the moment do not tell the full story.', 'sentiment': 0.0258, 'highlighted_in': 'main_text'}, {'highlight': 'It is precisely because it enjoyed a 25%+ operating margin back in 2021 that Tesla was able to grow its company with aggressive pricing in the past 2 years.\\n\\nToday, for the majority of use cases, I believe buying an EV that is not a Tesla does not make rational sense.', 'sentiment': 0.4019, 'highlighted_in': 'main_text'}, {'highlight': 'These are cars that have starting prices that are significantly higher than Tesla, but with worse reviews, worse technology and limited access to Tesla’s SuperCharger system. Even EV-native car brands, such as Rivian and Polestar, have difficulty in competing with Tesla.', 'sentiment': -0.9294, 'highlighted_in': 'main_text'}, {'highlight': 'A Rivian R2 starts at $45,000, which is almost $7,000 more than the base Tesla Model 3.\\n\\nI believe that Tesla is using its margins to grow the EV category, converting ICE consumers, and simultaneously gain the monster share of that growing market.', 'sentiment': 0.743, 'highlighted_in': 'main_text'}, {'highlight': \"Key Financial Metrics for Tesla, Q1 24 (Tesla's Q1 Shareholders Presentation)\\n\\nKey Financial Metrics for Stellantis, Q1 24 (Stellantis' Q1 Shareholder Presentation)\\n\\nThis financial data tells the same story: Tesla and Stellantis complement each other.\", 'sentiment': 0, 'highlighted_in': 'main_text'}, {'highlight': 'VOO since 2021 (Seeking Alpha)\\n\\nWhat matters for my thesis is that Tesla and Stellantis are complementary in how they reward shareholders and how the market prices their stocks. Tesla is a tech company, looking at the long term, and shareholders need to be patient to see returns.', 'sentiment': 0.5859, 'highlighted_in': 'main_text'}, {'highlight': 'In that case, Tesla might generate returns significantly higher than Stellantis, to the point that it would have seemed silly to “dilute” a Tesla investment with another stock.', 'sentiment': 0.0258, 'highlighted_in': 'main_text'}, {'highlight': 'Stellantis And Tesla: Combine These Stocks For The Ultimate Automotive Portfolio (STLA)', 'sentiment': 0, 'highlighted_in': 'title'}]}], 'similar': []}, {'uuid': '47a58bd4-3a8d-40fe-8a89-934d0d695ea4', 'title': 'Tesla is being investigated by the NHTSA for Autopilot software fix (NASDAQ:TSLA)', 'description': \"The National Highway Traffic Safety Administration is investigating whether Tesla's recall of 2 million vehicles for Autopilot safeguards is sufficient.\", 'keywords': '', 'snippet': \"The National Highway Traffic Safety Administration confirmed on Friday that the safety regulator has opened an investigation into whether Tesla's (NASDAQ:TSLA) ...\", 'url': 'https://seekingalpha.com/news/4094754-tesla-is-being-investigated-by-the-nhtsa-for-autopilot-software-fix', 'image_url': 'https://static.seekingalpha.com/cdn/s3/uploads/getty_images/1415090444/image_1415090444.jpg?io=getty-c-w750', 'language': 'en', 'published_at': '2024-04-26T10:50:20.000000Z', 'source': 'seekingalpha.com', 'relevance_score': None, 'entities': [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'exchange': None, 'exchange_long': None, 'country': 'us', 'type': 'equity', 'industry': 'Consumer Cyclical', 'match_score': 51.845444, 'sentiment_score': 0.42985, 'highlights': [{'highlight': \"The National Highway Traffic Safety Administration confirmed on Friday that the safety regulator has opened an investigation into whether Tesla's (NASDAQ:TSLA) recall of more than 2 million vehicles announced in December to install new Autopilot safeguards is adequate.\", 'sentiment': 0.836, 'highlighted_in': 'main_text'}, {'highlight': \"While Tesla has released software updates to address potential issues, NHTSA cited Tesla's statement that a portion of the remedy both requires the owner to opt in and allows a driver to readily reverse it.\", 'sentiment': 0, 'highlighted_in': 'main_text'}, {'highlight': 'In December, Tesla (TSLA) said its largest-ever recall was to better ensure drivers pay attention when using its advanced driver assistance system.\\n\\nShares of Tesla (TSLA) rose 1.17% in premarket trading on Friday to $172.17. The EV stock is down 31.51% on a year-to-date basis. Short interest stands at 3.84% of the total float.', 'sentiment': 0.8834, 'highlighted_in': 'main_text'}, {'highlight': 'Tesla is being investigated by the NHTSA for Autopilot software fix (NASDAQ:TSLA)', 'sentiment': 0, 'highlighted_in': 'title'}]}], 'similar': [{'uuid': 'b269d18a-6ea0-4554-a20e-047c623513f9', 'title': 'US probes Tesla recall of 2 million vehicles over Autopilot, citing concerns By Reuters', 'description': 'US probes Tesla recall of 2 million vehicles over Autopilot, citing concerns', 'keywords': '', 'snippet': \"WASHINGTON (Reuters) - U.S. auto safety regulators said Friday they have opened an investigation into whether Tesla (NASDAQ: )'s recall of more than 2 million v...\", 'url': 'https://www.investing.com/news/stock-market-news/us-probes-tesla-recall-of-2-million-vehicles-over-autopilot-citing-concerns-3400236', 'image_url': 'https://i-invdn-com.investing.com/news/moved_LYNXMPEJ580NE_L.jpg', 'language': 'en', 'published_at': '2024-04-26T09:51:10.000000Z', 'source': 'investing.com', 'relevance_score': None, 'entities': [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'exchange': None, 'exchange_long': None, 'country': 'us', 'type': 'equity', 'industry': 'Consumer Cyclical', 'match_score': 25.2132, 'sentiment_score': 0.432933, 'highlights': [{'highlight': \"WASHINGTON (Reuters) - U.S. auto safety regulators said Friday they have opened an investigation into whether Tesla (NASDAQ: )'s recall of more than 2 million vehicles announced in December to install new Autopilot safeguards is adequate.\", 'sentiment': 0.7269, 'highlighted_in': 'main_text'}, {'highlight': 'The agency said Tesla has issued software updates to address issues that appear related to its concerns but has not made them \"a part of the recall or otherwise determined to remedy a defect that poses an unreasonable safety risk.\"', 'sentiment': 0.5719, 'highlighted_in': 'main_text'}, {'highlight': 'US probes Tesla recall of 2 million vehicles over Autopilot, citing concerns By Reuters', 'sentiment': 0, 'highlighted_in': 'title'}]}]}]}]}\n" ] } ], "source": [ "response = requests.get(endpoint)\n", "data = response.json()\n", "print(data) # See what the data looks like\n" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Grabbing News data between 2022-01-01-2022-01-16\n" ] }, { "ename": "ValueError", "evalue": "All arrays must be of the same length", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m/Users/manos/Documents/BDS/MLops_mod/feature_preprocessing.ipynb Cell 4\u001b[0m line \u001b[0;36m4\n\u001b[1;32m 2\u001b[0m \u001b[39mfor\u001b[39;00m year \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39m2022\u001b[39m,\u001b[39m2023\u001b[39m):\n\u001b[1;32m 3\u001b[0m \u001b[39mfor\u001b[39;00m month \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39m1\u001b[39m,\u001b[39m13\u001b[39m):\n\u001b[0;32m----> 4\u001b[0m getNews_historical(api_key,endpoint,ticker,year,month)\n\u001b[1;32m 5\u001b[0m \u001b[39mif\u001b[39;00m year \u001b[39m==\u001b[39m \u001b[39m2023\u001b[39m \u001b[39mand\u001b[39;00m month \u001b[39m==\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m 6\u001b[0m \u001b[39mbreak\u001b[39;00m\n", "\u001b[1;32m/Users/manos/Documents/BDS/MLops_mod/feature_preprocessing.ipynb Cell 4\u001b[0m line \u001b[0;36m9\n\u001b[1;32m 6\u001b[0m to_date \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m-\u001b[39m\u001b[39m{:02d}\u001b[39;00m\u001b[39m-\u001b[39m\u001b[39m{:02d}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mformat(year,month,end)\n\u001b[1;32m 8\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mGrabbing News data between \u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m-\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mformat(from_date,to_date)) \n\u001b[0;32m----> 9\u001b[0m news \u001b[39m=\u001b[39m getNews(api_key,endpoint,ticker,from_date,to_date)\n\u001b[1;32m 11\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mNumber of articles: \u001b[39m\u001b[39m'\u001b[39m,\u001b[39mlen\u001b[39m(news\u001b[39m.\u001b[39mindex))\n\u001b[1;32m 12\u001b[0m news\u001b[39m.\u001b[39mhead(n\u001b[39m=\u001b[39mnum)\n", "File \u001b[0;32m~/Documents/BDS/MLops_mod/feature_engineering.py:27\u001b[0m, in \u001b[0;36mgetNews\u001b[0;34m(api_key, endpoint, ticker, from_date, to_date, num)\u001b[0m\n\u001b[1;32m 21\u001b[0m response \u001b[39m=\u001b[39m requests\u001b[39m.\u001b[39mget(endpoint, params\u001b[39m=\u001b[39mparams)\n\u001b[1;32m 23\u001b[0m \u001b[39m# Print the response from the API\u001b[39;00m\n\u001b[1;32m 24\u001b[0m \u001b[39m#print(response.json())\u001b[39;00m\n\u001b[1;32m 25\u001b[0m \n\u001b[1;32m 26\u001b[0m \u001b[39m#Return a Pandas dataframe from the response\u001b[39;00m\n\u001b[0;32m---> 27\u001b[0m \u001b[39mreturn\u001b[39;00m pd\u001b[39m.\u001b[39mDataFrame(response\u001b[39m.\u001b[39mjson())\n", "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:662\u001b[0m, in \u001b[0;36mDataFrame.__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[1;32m 656\u001b[0m mgr \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_init_mgr(\n\u001b[1;32m 657\u001b[0m data, axes\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mindex\u001b[39m\u001b[39m\"\u001b[39m: index, \u001b[39m\"\u001b[39m\u001b[39mcolumns\u001b[39m\u001b[39m\"\u001b[39m: columns}, dtype\u001b[39m=\u001b[39mdtype, copy\u001b[39m=\u001b[39mcopy\n\u001b[1;32m 658\u001b[0m )\n\u001b[1;32m 660\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(data, \u001b[39mdict\u001b[39m):\n\u001b[1;32m 661\u001b[0m \u001b[39m# GH#38939 de facto copy defaults to False only in non-dict cases\u001b[39;00m\n\u001b[0;32m--> 662\u001b[0m mgr \u001b[39m=\u001b[39m dict_to_mgr(data, index, columns, dtype\u001b[39m=\u001b[39mdtype, copy\u001b[39m=\u001b[39mcopy, typ\u001b[39m=\u001b[39mmanager)\n\u001b[1;32m 663\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(data, ma\u001b[39m.\u001b[39mMaskedArray):\n\u001b[1;32m 664\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mnumpy\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mma\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mmrecords\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mmrecords\u001b[39;00m\n", "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:493\u001b[0m, in \u001b[0;36mdict_to_mgr\u001b[0;34m(data, index, columns, dtype, typ, copy)\u001b[0m\n\u001b[1;32m 489\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 490\u001b[0m \u001b[39m# dtype check to exclude e.g. range objects, scalars\u001b[39;00m\n\u001b[1;32m 491\u001b[0m arrays \u001b[39m=\u001b[39m [x\u001b[39m.\u001b[39mcopy() \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(x, \u001b[39m\"\u001b[39m\u001b[39mdtype\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39melse\u001b[39;00m x \u001b[39mfor\u001b[39;00m x \u001b[39min\u001b[39;00m arrays]\n\u001b[0;32m--> 493\u001b[0m \u001b[39mreturn\u001b[39;00m arrays_to_mgr(arrays, columns, index, dtype\u001b[39m=\u001b[39mdtype, typ\u001b[39m=\u001b[39mtyp, consolidate\u001b[39m=\u001b[39mcopy)\n", "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:118\u001b[0m, in \u001b[0;36marrays_to_mgr\u001b[0;34m(arrays, columns, index, dtype, verify_integrity, typ, consolidate)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[39mif\u001b[39;00m verify_integrity:\n\u001b[1;32m 116\u001b[0m \u001b[39m# figure out the index, if necessary\u001b[39;00m\n\u001b[1;32m 117\u001b[0m \u001b[39mif\u001b[39;00m index \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 118\u001b[0m index \u001b[39m=\u001b[39m _extract_index(arrays)\n\u001b[1;32m 119\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 120\u001b[0m index \u001b[39m=\u001b[39m ensure_index(index)\n", "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:666\u001b[0m, in \u001b[0;36m_extract_index\u001b[0;34m(data)\u001b[0m\n\u001b[1;32m 664\u001b[0m lengths \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(\u001b[39mset\u001b[39m(raw_lengths))\n\u001b[1;32m 665\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(lengths) \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[0;32m--> 666\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mAll arrays must be of the same length\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 668\u001b[0m \u001b[39mif\u001b[39;00m have_dicts:\n\u001b[1;32m 669\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 670\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mMixing dicts with non-Series may lead to ambiguous ordering.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 671\u001b[0m )\n", "\u001b[0;31mValueError\u001b[0m: All arrays must be of the same length" ] } ], "source": [ "# Grab old data\n", "for year in range(2022,2023):\n", " for month in range(1,13):\n", " getNews_historical(api_key,endpoint,ticker,year,month)\n", " if year == 2023 and month == 1:\n", " break" ] } ], "metadata": { "language_info": { "name": "python" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }