cyberosa
adding new dataset with weekly metrics for traders and updated ROI
1ab360a
import pandas as pd
from pathlib import Path
from datetime import datetime, timedelta
import gzip
import shutil
import os
from huggingface_hub import hf_hub_download
SCRIPTS_DIR = Path(__file__).parent
ROOT_DIR = SCRIPTS_DIR.parent
TMP_DIR = ROOT_DIR / "tmp"
def get_traders_family(row: pd.DataFrame) -> str:
if row.staking == "non_agent":
return "non_agent"
elif row.market_creator == "pearl":
return "pearl_agent"
# quickstart
return "quickstart_agent"
def get_current_week():
current_date = datetime.now()
# Get the start and end dates of the current week (starting on Sunday)
current_week_start = current_date - timedelta(days=current_date.weekday() + 1)
return current_week_start.strftime("%b-%d-%Y")
def get_next_week():
current_date = datetime.now()
next_week_start = current_date + timedelta(days=7 - (current_date.weekday() + 1))
return next_week_start.strftime("%b-%d-%Y")
def load_all_data():
# all trades profitability
# Download the compressed file
gz_filepath_trades = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="all_trades_profitability.parquet.gz",
repo_type="dataset",
)
parquet_filepath_trades = gz_filepath_trades.replace(".gz", "")
parquet_filepath_trades = parquet_filepath_trades.replace("all", "")
with gzip.open(gz_filepath_trades, "rb") as f_in:
with open(parquet_filepath_trades, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
# Now read the decompressed parquet file
df1 = pd.read_parquet(parquet_filepath_trades)
# closed_markets_div
closed_markets_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="closed_markets_div.parquet",
repo_type="dataset",
)
df2 = pd.read_parquet(closed_markets_df)
# daily_info
daily_info_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="daily_info.parquet",
repo_type="dataset",
)
df3 = pd.read_parquet(daily_info_df)
# unknown traders
unknown_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="unknown_traders.parquet",
repo_type="dataset",
)
df4 = pd.read_parquet(unknown_df)
# retention activity
gz_file_path_ret = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="retention_activity.parquet.gz",
repo_type="dataset",
)
parquet_file_path_ret = gz_file_path_ret.replace(".gz", "")
with gzip.open(gz_file_path_ret, "rb") as f_in:
with open(parquet_file_path_ret, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
df5 = pd.read_parquet(parquet_file_path_ret)
# os.remove(parquet_file_path_ret)
# active_traders.parquet
active_traders_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="active_traders.parquet",
repo_type="dataset",
)
df6 = pd.read_parquet(active_traders_df)
# weekly_mech_calls.parquet
all_mech_calls_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="weekly_mech_calls.parquet",
repo_type="dataset",
)
df7 = pd.read_parquet(all_mech_calls_df)
# daa for quickstart and pearl
daa_qs_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="latest_result_DAA_QS.parquet",
repo_type="dataset",
)
df8 = pd.read_parquet(daa_qs_df)
daa_pearl_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="latest_result_DAA_Pearl.parquet",
repo_type="dataset",
)
df9 = pd.read_parquet(daa_pearl_df)
# Read weekly_avg_roi_pearl_agents.parquet
weekly_avg_roi_pearl_agents = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="weekly_avg_roi_pearl_agents.parquet",
repo_type="dataset",
)
df10 = pd.read_parquet(weekly_avg_roi_pearl_agents)
# two_weeks_avg_roi_pearl_agents.parquet
two_weeks_avg_roi_pearl_agents = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="two_weeks_avg_roi_pearl_agents.parquet",
repo_type="dataset",
)
df11 = pd.read_parquet(two_weeks_avg_roi_pearl_agents)
# read traders_weekly_metrics.parquet file
traders_weekly_metrics_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="traders_weekly_metrics.parquet",
repo_type="dataset",
)
df12 = pd.read_parquet(traders_weekly_metrics_df)
return df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12
def prepare_data():
(
all_trades,
closed_markets,
daily_info,
unknown_traders,
retention_df,
active_traders,
all_mech_calls,
daa_qs_df,
daa_pearl_df,
weekly_avg_roi_pearl_agents,
two_weeks_avg_roi_pearl_agents,
traders_weekly_metrics_df,
) = load_all_data()
all_trades["creation_timestamp"] = all_trades["creation_timestamp"].dt.tz_convert(
"UTC"
)
all_trades = all_trades.sort_values(by="creation_timestamp", ascending=True)
all_trades["creation_date"] = all_trades["creation_timestamp"].dt.date
# nr-trades variable
volume_trades_per_trader_and_market = (
all_trades.groupby(["trader_address", "title"])["roi"]
.count()
.reset_index(name="nr_trades_per_market")
)
traders_data = pd.merge(
all_trades, volume_trades_per_trader_and_market, on=["trader_address", "title"]
)
daily_info["creation_date"] = daily_info["creation_timestamp"].dt.date
unknown_traders["creation_date"] = unknown_traders["creation_timestamp"].dt.date
active_traders["creation_date"] = active_traders["creation_timestamp"].dt.date
# adding the trader family column
traders_data["trader_family"] = traders_data.apply(
lambda x: get_traders_family(x), axis=1
)
# print(traders_data.head())
traders_data = traders_data.sort_values(by="creation_timestamp", ascending=True)
unknown_traders = unknown_traders.sort_values(
by="creation_timestamp", ascending=True
)
traders_data["month_year_week"] = (
traders_data["creation_timestamp"]
.dt.to_period("W")
.dt.start_time.dt.strftime("%b-%d-%Y")
)
unknown_traders["month_year_week"] = (
unknown_traders["creation_timestamp"]
.dt.to_period("W")
.dt.start_time.dt.strftime("%b-%d-%Y")
)
closed_markets["month_year_week"] = (
closed_markets["opening_datetime"]
.dt.to_period("W")
.dt.start_time.dt.strftime("%b-%d-%Y")
)
# prepare the daa dataframes
daa_pearl_df["day"] = pd.to_datetime(
daa_pearl_df["day"], format="%Y-%m-%d 00:00:00.000 UTC"
)
daa_qs_df["day"] = pd.to_datetime(
daa_qs_df["day"], format="%Y-%m-%d 00:00:00.000 UTC"
)
daa_pearl_df["day"] = daa_pearl_df["day"].dt.tz_localize("UTC")
daa_qs_df["day"] = daa_qs_df["day"].dt.tz_localize("UTC")
daa_qs_df["tx_date"] = pd.to_datetime(daa_qs_df["day"]).dt.date
daa_pearl_df["tx_date"] = pd.to_datetime(daa_pearl_df["day"]).dt.date
daa_pearl_df["seven_day_trailing_avg"] = pd.to_numeric(
daa_pearl_df["seven_day_trailing_avg"], errors="coerce"
)
daa_pearl_df["seven_day_trailing_avg"] = daa_pearl_df[
"seven_day_trailing_avg"
].round(2)
daa_qs_df["seven_day_trailing_avg"] = pd.to_numeric(
daa_qs_df["seven_day_trailing_avg"], errors="coerce"
)
daa_qs_df["seven_day_trailing_avg"] = daa_qs_df["seven_day_trailing_avg"].round(2)
return (
traders_data,
closed_markets,
daily_info,
unknown_traders,
retention_df,
active_traders,
all_mech_calls,
daa_qs_df,
daa_pearl_df,
weekly_avg_roi_pearl_agents,
two_weeks_avg_roi_pearl_agents,
traders_weekly_metrics_df,
)