|
import pandas as pd |
|
from pathlib import Path |
|
from datetime import datetime, timedelta |
|
import gzip |
|
import shutil |
|
import os |
|
from huggingface_hub import hf_hub_download |
|
|
|
SCRIPTS_DIR = Path(__file__).parent |
|
ROOT_DIR = SCRIPTS_DIR.parent |
|
TMP_DIR = ROOT_DIR / "tmp" |
|
|
|
|
|
def get_traders_family(row: pd.DataFrame) -> str: |
|
if row.staking == "non_agent": |
|
return "non_agent" |
|
elif row.market_creator == "pearl": |
|
return "pearl_agent" |
|
|
|
return "quickstart_agent" |
|
|
|
|
|
def get_current_week(): |
|
current_date = datetime.now() |
|
|
|
|
|
current_week_start = current_date - timedelta(days=current_date.weekday() + 1) |
|
return current_week_start.strftime("%b-%d-%Y") |
|
|
|
|
|
def get_next_week(): |
|
current_date = datetime.now() |
|
next_week_start = current_date + timedelta(days=7 - (current_date.weekday() + 1)) |
|
return next_week_start.strftime("%b-%d-%Y") |
|
|
|
|
|
def load_all_data(): |
|
|
|
|
|
|
|
gz_filepath_trades = hf_hub_download( |
|
repo_id="valory/Olas-predict-dataset", |
|
filename="all_trades_profitability.parquet.gz", |
|
repo_type="dataset", |
|
) |
|
|
|
parquet_filepath_trades = gz_filepath_trades.replace(".gz", "") |
|
parquet_filepath_trades = parquet_filepath_trades.replace("all", "") |
|
|
|
with gzip.open(gz_filepath_trades, "rb") as f_in: |
|
with open(parquet_filepath_trades, "wb") as f_out: |
|
shutil.copyfileobj(f_in, f_out) |
|
|
|
|
|
df1 = pd.read_parquet(parquet_filepath_trades) |
|
|
|
|
|
closed_markets_df = hf_hub_download( |
|
repo_id="valory/Olas-predict-dataset", |
|
filename="closed_markets_div.parquet", |
|
repo_type="dataset", |
|
) |
|
df2 = pd.read_parquet(closed_markets_df) |
|
|
|
|
|
daily_info_df = hf_hub_download( |
|
repo_id="valory/Olas-predict-dataset", |
|
filename="daily_info.parquet", |
|
repo_type="dataset", |
|
) |
|
df3 = pd.read_parquet(daily_info_df) |
|
|
|
|
|
unknown_df = hf_hub_download( |
|
repo_id="valory/Olas-predict-dataset", |
|
filename="unknown_traders.parquet", |
|
repo_type="dataset", |
|
) |
|
df4 = pd.read_parquet(unknown_df) |
|
|
|
|
|
gz_file_path_ret = hf_hub_download( |
|
repo_id="valory/Olas-predict-dataset", |
|
filename="retention_activity.parquet.gz", |
|
repo_type="dataset", |
|
) |
|
parquet_file_path_ret = gz_file_path_ret.replace(".gz", "") |
|
|
|
with gzip.open(gz_file_path_ret, "rb") as f_in: |
|
with open(parquet_file_path_ret, "wb") as f_out: |
|
shutil.copyfileobj(f_in, f_out) |
|
df5 = pd.read_parquet(parquet_file_path_ret) |
|
|
|
|
|
|
|
active_traders_df = hf_hub_download( |
|
repo_id="valory/Olas-predict-dataset", |
|
filename="active_traders.parquet", |
|
repo_type="dataset", |
|
) |
|
df6 = pd.read_parquet(active_traders_df) |
|
|
|
|
|
all_mech_calls_df = hf_hub_download( |
|
repo_id="valory/Olas-predict-dataset", |
|
filename="weekly_mech_calls.parquet", |
|
repo_type="dataset", |
|
) |
|
df7 = pd.read_parquet(all_mech_calls_df) |
|
|
|
|
|
daa_qs_df = hf_hub_download( |
|
repo_id="valory/Olas-predict-dataset", |
|
filename="latest_result_DAA_QS.parquet", |
|
repo_type="dataset", |
|
) |
|
df8 = pd.read_parquet(daa_qs_df) |
|
|
|
daa_pearl_df = hf_hub_download( |
|
repo_id="valory/Olas-predict-dataset", |
|
filename="latest_result_DAA_Pearl.parquet", |
|
repo_type="dataset", |
|
) |
|
df9 = pd.read_parquet(daa_pearl_df) |
|
|
|
weekly_avg_roi_pearl_agents = hf_hub_download( |
|
repo_id="valory/Olas-predict-dataset", |
|
filename="weekly_avg_roi_pearl_agents.parquet", |
|
repo_type="dataset", |
|
) |
|
df10 = pd.read_parquet(weekly_avg_roi_pearl_agents) |
|
|
|
|
|
two_weeks_avg_roi_pearl_agents = hf_hub_download( |
|
repo_id="valory/Olas-predict-dataset", |
|
filename="two_weeks_avg_roi_pearl_agents.parquet", |
|
repo_type="dataset", |
|
) |
|
df11 = pd.read_parquet(two_weeks_avg_roi_pearl_agents) |
|
|
|
|
|
traders_weekly_metrics_df = hf_hub_download( |
|
repo_id="valory/Olas-predict-dataset", |
|
filename="traders_weekly_metrics.parquet", |
|
repo_type="dataset", |
|
) |
|
df12 = pd.read_parquet(traders_weekly_metrics_df) |
|
return df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12 |
|
|
|
|
|
def prepare_data(): |
|
|
|
( |
|
all_trades, |
|
closed_markets, |
|
daily_info, |
|
unknown_traders, |
|
retention_df, |
|
active_traders, |
|
all_mech_calls, |
|
daa_qs_df, |
|
daa_pearl_df, |
|
weekly_avg_roi_pearl_agents, |
|
two_weeks_avg_roi_pearl_agents, |
|
traders_weekly_metrics_df, |
|
) = load_all_data() |
|
all_trades["creation_timestamp"] = all_trades["creation_timestamp"].dt.tz_convert( |
|
"UTC" |
|
) |
|
all_trades = all_trades.sort_values(by="creation_timestamp", ascending=True) |
|
all_trades["creation_date"] = all_trades["creation_timestamp"].dt.date |
|
|
|
|
|
volume_trades_per_trader_and_market = ( |
|
all_trades.groupby(["trader_address", "title"])["roi"] |
|
.count() |
|
.reset_index(name="nr_trades_per_market") |
|
) |
|
|
|
traders_data = pd.merge( |
|
all_trades, volume_trades_per_trader_and_market, on=["trader_address", "title"] |
|
) |
|
daily_info["creation_date"] = daily_info["creation_timestamp"].dt.date |
|
unknown_traders["creation_date"] = unknown_traders["creation_timestamp"].dt.date |
|
active_traders["creation_date"] = active_traders["creation_timestamp"].dt.date |
|
|
|
traders_data["trader_family"] = traders_data.apply( |
|
lambda x: get_traders_family(x), axis=1 |
|
) |
|
|
|
|
|
traders_data = traders_data.sort_values(by="creation_timestamp", ascending=True) |
|
unknown_traders = unknown_traders.sort_values( |
|
by="creation_timestamp", ascending=True |
|
) |
|
traders_data["month_year_week"] = ( |
|
traders_data["creation_timestamp"] |
|
.dt.to_period("W") |
|
.dt.start_time.dt.strftime("%b-%d-%Y") |
|
) |
|
unknown_traders["month_year_week"] = ( |
|
unknown_traders["creation_timestamp"] |
|
.dt.to_period("W") |
|
.dt.start_time.dt.strftime("%b-%d-%Y") |
|
) |
|
closed_markets["month_year_week"] = ( |
|
closed_markets["opening_datetime"] |
|
.dt.to_period("W") |
|
.dt.start_time.dt.strftime("%b-%d-%Y") |
|
) |
|
|
|
|
|
daa_pearl_df["day"] = pd.to_datetime( |
|
daa_pearl_df["day"], format="%Y-%m-%d 00:00:00.000 UTC" |
|
) |
|
daa_qs_df["day"] = pd.to_datetime( |
|
daa_qs_df["day"], format="%Y-%m-%d 00:00:00.000 UTC" |
|
) |
|
daa_pearl_df["day"] = daa_pearl_df["day"].dt.tz_localize("UTC") |
|
daa_qs_df["day"] = daa_qs_df["day"].dt.tz_localize("UTC") |
|
daa_qs_df["tx_date"] = pd.to_datetime(daa_qs_df["day"]).dt.date |
|
daa_pearl_df["tx_date"] = pd.to_datetime(daa_pearl_df["day"]).dt.date |
|
daa_pearl_df["seven_day_trailing_avg"] = pd.to_numeric( |
|
daa_pearl_df["seven_day_trailing_avg"], errors="coerce" |
|
) |
|
daa_pearl_df["seven_day_trailing_avg"] = daa_pearl_df[ |
|
"seven_day_trailing_avg" |
|
].round(2) |
|
daa_qs_df["seven_day_trailing_avg"] = pd.to_numeric( |
|
daa_qs_df["seven_day_trailing_avg"], errors="coerce" |
|
) |
|
daa_qs_df["seven_day_trailing_avg"] = daa_qs_df["seven_day_trailing_avg"].round(2) |
|
return ( |
|
traders_data, |
|
closed_markets, |
|
daily_info, |
|
unknown_traders, |
|
retention_df, |
|
active_traders, |
|
all_mech_calls, |
|
daa_qs_df, |
|
daa_pearl_df, |
|
weekly_avg_roi_pearl_agents, |
|
two_weeks_avg_roi_pearl_agents, |
|
traders_weekly_metrics_df, |
|
) |
|
|