File size: 3,208 Bytes
349c960
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# ---------------------- Library Imports ----------------------
import os
import json
import pandas as pd
import numpy as np
import logging
import requests
from dotenv import load_dotenv
from requests import Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
import time 

# ---------------------- Environment Variables ----------------------
load_dotenv()
url_cmc = os.getenv("URL_CMC")
api_key_cmc = os.getenv("API_KEY_CMC")
log_folder = os.getenv("LOG_FOLDER")
os.makedirs(log_folder, exist_ok=True)
log_file = os.path.join(log_folder, "cmc_scrapping.log")
log_format = "%(asctime)s [%(levelname)s] - %(message)s"
logging.basicConfig(filename=log_file, level=logging.INFO, format=log_format)

# ---------------------- Helper Functions ----------------------

def log_execution_time(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        logging.info(f"Function {func.__name__} executed in {end_time - start_time:.2f} seconds")
        return result
    return wrapper

def process_cmc_data(data, stop):
    df = pd.DataFrame(data["data"])[["name", "symbol", "circulating_supply", "total_supply", "quote"]]
    quote_df = pd.json_normalize(df['quote'].apply(lambda x: x['USD']))[["price", "percent_change_24h", "percent_change_7d", "percent_change_90d", "market_cap", "fully_diluted_market_cap", "last_updated"]]
    df = df.drop("quote", axis=1)
    df["percent_tokens_circulation"] = np.round((df["circulating_supply"] / df["total_supply"]) * 100, 1)
    df = df.join(quote_df)
    df["last_updated"] = pd.to_datetime(df["last_updated"])
    save_cmc_data(df, stop)

def save_cmc_data(df, stop):
    output_file = f"output/top_{stop}_update.csv"
    if os.path.isfile(output_file):
        existing_data = pd.read_csv(output_file)
        updated_data = pd.concat([existing_data, df], axis=0, ignore_index=True)
        updated_data.drop_duplicates(subset=["symbol", "last_updated"], inplace=True)
        updated_data.to_csv(output_file, index=False)
    else:
        df.to_csv(output_file, index=False)
    logging.info("CMC data script execution completed.")

# ---------------------- CMC Scraping Function ----------------------

@log_execution_time
def fetch_and_process_cmc_data():
    session = Session()
    session.headers.update({
        'Accepts': 'application/json',
        'X-CMC_PRO_API_KEY': api_key_cmc,
    })
    parameters = {
        'start': '1',
        'limit': '100',
        'convert': 'USD'
    }

    for endpoint in ["v1/cryptocurrency/listings/latest"]:
        target = f"{url_cmc}/{endpoint}"
        try:
            response = session.get(target, params=parameters)
            data = json.loads(response.text)
            with open(f'output/cmc_data_{endpoint.replace("/", "_")}_100.json', 'w') as f:
                json.dump(data, f)
            process_cmc_data(data, '100')
        except (ConnectionError, Timeout, TooManyRedirects) as e:
            logging.error(f"Error while fetching data from {target}: {e}")

# ---------------------- Execution ----------------------
if __name__ == "__main__":
    fetch_and_process_cmc_data()