{ "cells": [ { "cell_type": "markdown", "id": "e6c534cf-774f-4a84-b387-93b9c4650019", "metadata": {}, "source": [ "!conda activate crypto" ] }, { "cell_type": "code", "execution_count": 1, "id": "5f0cde95-ce0e-4632-89ab-7d9e8fab773d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 973 ms, sys: 154 ms, total: 1.13 s\n", "Wall time: 1.7 s\n" ] } ], "source": [ "%%time\n", "#-------------------\n", "# Web scraping Yahoo Finance\n", "#-------------------\n", "from bs4 import BeautifulSoup\n", "import requests\n", "import pandas as pd\n", "\n", "dic = {}\n", "url = 'https://finance.yahoo.com/cryptocurrencies?offset=0&count=100'\n", "soup = BeautifulSoup(requests.get(url).text)\n", "\n", "# store values in separate lists and then in a dictionary\n", "for listing in soup.find_all('div', attrs={'id':'fin-scr-res-table'}):\n", " symbol_list = []\n", " name_list = []\n", " price_list = []\n", " change_list = []\n", " mcap_list = []\n", "for symbol in listing.find_all('td', attrs={'aria-label':'Symbol'}):\n", " symbol_list.append(symbol.text)\n", " dic['Symbol'] = symbol_list\n", "for name in listing.find_all('td', attrs={'aria-label':'Name'}):\n", " name_list.append(name.text)\n", " dic['Name'] = name_list\n", "for price in listing.find_all('td', attrs={'aria-label':'Price (Intraday)'}):\n", " price_list.append(price.text)\n", " dic['Price'] = price_list\n", "for change in listing.find_all('td', attrs={'aria-label':'% Change'}):\n", " change_list.append(change.text)\n", " dic['% Change'] = change_list\n", "for mcap in listing.find_all('td', attrs={'aria-label':'Market Cap'}):\n", " mcap_list.append(mcap.text)\n", " dic['Market Cap'] = mcap_list\n", " \n", "# create a dataframe from dictionary \n", "df_scrape = pd.DataFrame(dic)\n", "df_scrape.Symbol = df_scrape.Symbol.str.replace('-USD','')\n", "df_scrape.Name = df_scrape.Name.str.replace(' USD','')" ] }, { "cell_type": "code", "execution_count": 2, "id": "eb541743-8bbb-4d82-a20e-c3c1faba7c52", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(25, 5)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_scrape.shape" ] }, { "cell_type": "code", "execution_count": 3, "id": "53e17481-97b7-449d-9676-ad39143ebb37", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Symbol | \n", "Name | \n", "Price | \n", "% Change | \n", "Market Cap | \n", "
---|---|---|---|---|---|
0 | \n", "BTC | \n", "Bitcoin | \n", "37,955.11 | \n", "-1.19% | \n", "722.194B | \n", "
1 | \n", "ETH | \n", "Ethereum | \n", "2,763.44 | \n", "-1.47% | \n", "333.28B | \n", "
2 | \n", "USDT | \n", "Tether | \n", "1.0002 | \n", "+0.01% | \n", "83.172B | \n", "
3 | \n", "BNB | \n", "Binance Coin | \n", "384.00 | \n", "-2.27% | \n", "62.698B | \n", "
4 | \n", "USDC | \n", "USD Coin | \n", "1.0001 | \n", "+0.03% | \n", "49.258B | \n", "
5 | \n", "SOL | \n", "Solana | \n", "87.73 | \n", "-5.48% | \n", "29.337B | \n", "
6 | \n", "HEX | \n", "HEX | \n", "0.165653 | \n", "-8.18% | \n", "28.726B | \n", "
7 | \n", "XRP | \n", "XRP | \n", "0.592676 | \n", "-3.98% | \n", "28.511B | \n", "
8 | \n", "LUNA1 | \n", "Terra | \n", "79.68 | \n", "-2.77% | \n", "27.499B | \n", "
9 | \n", "ADA | \n", "Cardano | \n", "0.773429 | \n", "-3.24% | \n", "26.105B | \n", "
10 | \n", "UST | \n", "TerraUSD | \n", "1.0009 | \n", "-0.09% | \n", "18.528B | \n", "
11 | \n", "BUSD | \n", "Binance | \n", "1.0010 | \n", "+0.03% | \n", "17.706B | \n", "
12 | \n", "DOGE | \n", "Dogecoin | \n", "0.131755 | \n", "-1.16% | \n", "17.48B | \n", "
13 | \n", "AVAX | \n", "Avalanche | \n", "58.20 | \n", "-4.84% | \n", "15.634B | \n", "
14 | \n", "DOT | \n", "Polkadot | \n", "14.96 | \n", "-5.06% | \n", "14.779B | \n", "
15 | \n", "SHIB | \n", "SHIBA INU | \n", "0.000021 | \n", "-4.53% | \n", "11.509B | \n", "
16 | \n", "WBTC | \n", "Wrapped Bitcoin | \n", "37,916.12 | \n", "-1.18% | \n", "10.689B | \n", "
17 | \n", "STETH | \n", "Lido stETH | \n", "2,748.98 | \n", "-1.45% | \n", "9.588B | \n", "
18 | \n", "DAI | \n", "Dai | \n", "0.999584 | \n", "+0.04% | \n", "8.788B | \n", "
19 | \n", "MATIC | \n", "Polygon | \n", "1.0845 | \n", "-3.60% | \n", "8.512B | \n", "
20 | \n", "CRO | \n", "Crypto.com Coin | \n", "0.332459 | \n", "-9.53% | \n", "8.399B | \n", "
21 | \n", "NEAR | \n", "NEAR Protocol | \n", "11.00 | \n", "-3.09% | \n", "7.42B | \n", "
22 | \n", "WTRX | \n", "Wrapped TRON | \n", "0.068104 | \n", "+7.78% | \n", "6.925B | \n", "
23 | \n", "TRX | \n", "TRON | \n", "0.068075 | \n", "+7.93% | \n", "6.917B | \n", "
24 | \n", "LTC | \n", "Litecoin | \n", "96.66 | \n", "-4.15% | \n", "6.785B | \n", "
\n", " | Open | \n", "High | \n", "Low | \n", "Close | \n", "Adj Close | \n", "Volume | \n", "
---|---|---|---|---|---|---|
Date | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
2017-05-01 | \n", "1348.300049 | \n", "1434.319946 | \n", "1348.300049 | \n", "1421.599976 | \n", "1421.599976 | \n", "713624000 | \n", "
2017-05-02 | \n", "1421.030029 | \n", "1473.900024 | \n", "1415.689941 | \n", "1452.819946 | \n", "1452.819946 | \n", "477337984 | \n", "
2017-05-03 | \n", "1453.780029 | \n", "1492.770020 | \n", "1447.489990 | \n", "1490.089966 | \n", "1490.089966 | \n", "583795968 | \n", "
2017-05-04 | \n", "1490.719971 | \n", "1608.910034 | \n", "1490.719971 | \n", "1537.670044 | \n", "1537.670044 | \n", "933548992 | \n", "
2017-05-05 | \n", "1540.869995 | \n", "1618.030029 | \n", "1530.310059 | \n", "1555.449951 | \n", "1555.449951 | \n", "946035968 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2022-04-27 | \n", "38120.300781 | \n", "39397.917969 | \n", "37997.312500 | \n", "39241.121094 | \n", "39241.121094 | \n", "30981015184 | \n", "
2022-04-28 | \n", "39241.429688 | \n", "40269.464844 | \n", "38941.421875 | \n", "39773.828125 | \n", "39773.828125 | \n", "33903704907 | \n", "
2022-04-29 | \n", "39768.617188 | \n", "39887.269531 | \n", "38235.535156 | \n", "38609.824219 | \n", "38609.824219 | \n", "30882994649 | \n", "
2022-04-30 | \n", "38605.859375 | \n", "38771.210938 | \n", "37697.941406 | \n", "37714.875000 | \n", "37714.875000 | \n", "23895713731 | \n", "
2022-05-01 | \n", "37715.718750 | \n", "38116.277344 | \n", "37610.785156 | \n", "38004.464844 | \n", "38004.464844 | \n", "26912634880 | \n", "
1827 rows × 6 columns
\n", "\n", " | ADA | \n", "AVAX | \n", "BNB | \n", "BTC | \n", "BUSD | \n", "CRO | \n", "DAI | \n", "DOGE | \n", "DOT | \n", "ETH | \n", "... | \n", "SHIB | \n", "SOL | \n", "STETH | \n", "TRX | \n", "USDC | \n", "USDT | \n", "UST | \n", "WBTC | \n", "WTRX | \n", "XRP | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
2020-10-01 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "100.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "2.16 | \n", "NaN | \n", "0.71 | \n", "0.0001 | \n", "NaN | \n", "75.959999 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2020-10-02 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "100.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "2.07 | \n", "NaN | \n", "0.67 | \n", "0.0001 | \n", "NaN | \n", "75.830002 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2020-10-05 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "99.995003 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "2.13 | \n", "NaN | \n", "0.70 | \n", "0.0001 | \n", "NaN | \n", "74.949997 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2020-10-06 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "100.029999 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "2.64 | \n", "NaN | \n", "0.69 | \n", "0.0001 | \n", "NaN | \n", "75.279999 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2020-10-07 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "99.919998 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "3.50 | \n", "NaN | \n", "0.70 | \n", "0.0001 | \n", "NaN | \n", "74.769997 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2020-09-25 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "2.14 | \n", "NaN | \n", "0.69 | \n", "0.0001 | \n", "NaN | \n", "76.190002 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2020-09-28 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "2.21 | \n", "NaN | \n", "0.69 | \n", "0.0001 | \n", "NaN | \n", "76.160004 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2020-09-29 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "2.10 | \n", "NaN | \n", "0.71 | \n", "0.0001 | \n", "NaN | \n", "76.290001 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2020-09-30 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "2.07 | \n", "NaN | \n", "0.70 | \n", "0.0001 | \n", "NaN | \n", "75.930000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2022-04-29 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "4.82 | \n", "NaN | \n", "0.30 | \n", "NaN | \n", "NaN | \n", "54.340000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1260 rows × 25 columns
\n", "