File size: 3,233 Bytes
de6e775
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import sys

import pandas as pd
from finta import TA


def add_time_feature(df, symbol, dt_col_name="time"):
    """read csv into df and index on time
    dt_col_name can be any unit from minutes to day. time is the index of pd
    must have pd columns [(time_col),(asset_col), Open,close,High,Low,day]
    data_process will add additional time information: time(index), minute, hour, weekday, week, month,year, day(since 1970)
    use StopLoss and ProfitTaken to simplify the action,
    feed a fixed StopLoss (SL = 200) and PT = SL * ratio
    action space: [action[0,2],ratio[0,10]]
    rewards is point

    add hourly, dayofweek(0-6, Sun-Sat)
    Args:
        file (str): file path/name.csv
    """

    df["symbol"] = symbol
    df["dt"] = pd.to_datetime(df[dt_col_name])
    df.index = df["dt"]
    df["minute"] = df["dt"].dt.minute
    df["hour"] = df["dt"].dt.hour
    df["weekday"] = df["dt"].dt.dayofweek
    df["week"] = df["dt"].dt.isocalendar().week
    df["month"] = df["dt"].dt.month
    df["year"] = df["dt"].dt.year
    df["day"] = df["dt"].dt.day
    # df = df.set_index('dt')
    return df


# 'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'dx_30','close_30_sma', 'close_60_sma'
def tech_indictors(df):
    df["macd"] = TA.MACD(df).SIGNAL
    df["boll_ub"] = TA.BBANDS(df).BB_UPPER
    df["boll_lb"] = TA.BBANDS(df).BB_LOWER
    df["rsi_30"] = TA.RSI(df, period=30)
    df["dx_30"] = TA.ADX(df, period=30)
    df["close_30_sma"] = TA.SMA(df, period=30)
    df["close_60_sma"] = TA.SMA(df, period=60)

    # fill NaN to 0
    df = df.fillna(0)
    print(
        f"--------df head - tail ----------------\n{df.head(3)}\n{df.tail(3)}\n---------------------------------"
    )

    return df


def split_timeserious(df, key_ts="dt", freq="W", symbol=""):
    """import df and split into hour, daily, weekly, monthly based and
    save into subfolder

    Args:
        df (pandas df with timestamp is part of multi index):
        spliter (str): H, D, W, M, Y
    """

    freq_name = {
        "H": "hourly",
        "D": "daily",
        "W": "weekly",
        "M": "monthly",
        "Y": "Yearly",
    }
    for count, (n, g) in enumerate(df.groupby(pd.Grouper(level=key_ts, freq=freq))):
        p = f"./data/split/{symbol}/{freq_name[freq]}"
        os.makedirs(p, exist_ok=True)
        # fname = f'{symbol}_{n:%Y%m%d}_{freq}_{count}.csv'
        fname = f"{symbol}_{n:%Y}_{count}.csv"
        fn = f"{p}/{fname}"
        print(f"save to:{fn}")
        g.reset_index(drop=True, inplace=True)
        g.drop(columns=["dt"], inplace=True)
        g.to_csv(fn)
    return


"""
python ./neo_finrl/data_processors/fx.py GBPUSD W ./data/raw/GBPUSD_raw.csv
symbol="GBPUSD"
freq = [H, D, W, M]
file .csv, column names [time, Open, High, Low, Close, Vol]
"""
if __name__ == "__main__":
    symbol, freq, file = sys.argv[1], sys.argv[2], sys.argv[3]
    print(f"processing... symbol:{symbol} freq:{freq} file:{file}")
    try:
        df = pd.read_csv(file)
    except Exception:
        print(f"No such file or directory: {file}")
        exit(0)
    df = add_time_feature(df, symbol=symbol, dt_col_name="time")
    df = tech_indictors(df)
    split_timeserious(df, freq=freq, symbol=symbol)
    print(f"Done!")