cyberosa
fixing datetime error:
279fac4
import pandas as pd
from pathlib import Path
from datetime import datetime, timedelta
import gzip
import shutil
import os
from huggingface_hub import hf_hub_download
SCRIPTS_DIR = Path(__file__).parent
ROOT_DIR = SCRIPTS_DIR.parent
TMP_DIR = ROOT_DIR / "tmp"
def get_traders_family(row: pd.DataFrame) -> str:
if row.staking == "non_agent":
return "non_agent"
elif row.market_creator == "pearl":
return "pearl_agent"
# quickstart
return "quickstart_agent"
def get_current_week():
current_date = datetime.now()
# Get the start and end dates of the current week (starting on Sunday)
current_week_start = current_date - timedelta(days=current_date.weekday() + 1)
return current_week_start.strftime("%b-%d-%Y")
def get_next_week():
current_date = datetime.now()
next_week_start = current_date + timedelta(days=7 - (current_date.weekday() + 1))
return next_week_start.strftime("%b-%d-%Y")
def load_all_data():
# all trades profitability
# Download the compressed file
gz_filepath_trades = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="all_trades_profitability.parquet.gz",
repo_type="dataset",
)
parquet_filepath_trades = gz_filepath_trades.replace(".gz", "")
parquet_filepath_trades = parquet_filepath_trades.replace("all", "")
with gzip.open(gz_filepath_trades, "rb") as f_in:
with open(parquet_filepath_trades, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
# Now read the decompressed parquet file
df1 = pd.read_parquet(parquet_filepath_trades)
# closed_markets_div
closed_markets_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="closed_markets_div.parquet",
repo_type="dataset",
)
df2 = pd.read_parquet(closed_markets_df)
# daily_info
daily_info_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="daily_info.parquet",
repo_type="dataset",
)
df3 = pd.read_parquet(daily_info_df)
# unknown traders
unknown_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="unknown_traders.parquet",
repo_type="dataset",
)
df4 = pd.read_parquet(unknown_df)
# retention activity
gz_file_path_ret = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="retention_activity.parquet.gz",
repo_type="dataset",
)
parquet_file_path_ret = gz_file_path_ret.replace(".gz", "")
with gzip.open(gz_file_path_ret, "rb") as f_in:
with open(parquet_file_path_ret, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
df5 = pd.read_parquet(parquet_file_path_ret)
# os.remove(parquet_file_path_ret)
# active_traders.parquet
active_traders_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="active_traders.parquet",
repo_type="dataset",
)
df6 = pd.read_parquet(active_traders_df)
# weekly_mech_calls.parquet
all_mech_calls_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="weekly_mech_calls.parquet",
repo_type="dataset",
)
df7 = pd.read_parquet(all_mech_calls_df)
# daa for quickstart and pearl
daa_qs_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="latest_result_DAA_QS.parquet",
repo_type="dataset",
)
df8 = pd.read_parquet(daa_qs_df)
daa_pearl_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="latest_result_DAA_Pearl.parquet",
repo_type="dataset",
)
df9 = pd.read_parquet(daa_pearl_df)
# Read weekly_avg_roi_pearl_agents.parquet
weekly_avg_roi_pearl_agents = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="weekly_avg_roi_pearl_agents.parquet",
repo_type="dataset",
)
df10 = pd.read_parquet(weekly_avg_roi_pearl_agents)
# two_weeks_avg_roi_pearl_agents.parquet
two_weeks_avg_roi_pearl_agents = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="two_weeks_avg_roi_pearl_agents.parquet",
repo_type="dataset",
)
df11 = pd.read_parquet(two_weeks_avg_roi_pearl_agents)
# read traders_weekly_metrics.parquet file
traders_weekly_metrics_df = hf_hub_download(
repo_id="valory/Olas-predict-dataset",
filename="traders_weekly_metrics.parquet",
repo_type="dataset",
)
df12 = pd.read_parquet(traders_weekly_metrics_df)
return df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12
def prepare_data():
(
all_trades,
closed_markets,
daily_info,
unknown_traders,
retention_df,
active_traders,
all_mech_calls,
daa_qs_df,
daa_pearl_df,
weekly_avg_roi_pearl_agents,
two_weeks_avg_roi_pearl_agents,
traders_weekly_metrics_df,
) = load_all_data()
all_trades["creation_timestamp"] = all_trades["creation_timestamp"].dt.tz_convert(
"UTC"
)
all_trades = all_trades.sort_values(by="creation_timestamp", ascending=True)
all_trades["creation_timestamp"] = pd.to_datetime(
all_trades["creation_timestamp"], errors="coerce"
)
all_trades["creation_date"] = all_trades["creation_timestamp"].dt.date
# nr-trades variable
volume_trades_per_trader_and_market = (
all_trades.groupby(["trader_address", "title"])["roi"]
.count()
.reset_index(name="nr_trades_per_market")
)
traders_data = pd.merge(
all_trades, volume_trades_per_trader_and_market, on=["trader_address", "title"]
)
daily_info["creation_timestamp"] = pd.to_datetime(
daily_info["creation_timestamp"], errors="coerce"
)
daily_info["creation_date"] = daily_info["creation_timestamp"].dt.date
unknown_traders["creation_date"] = unknown_traders["creation_timestamp"].dt.date
active_traders["creation_date"] = active_traders["creation_timestamp"].dt.date
# adding the trader family column
traders_data["trader_family"] = traders_data.apply(
lambda x: get_traders_family(x), axis=1
)
# print(traders_data.head())
traders_data = traders_data.sort_values(by="creation_timestamp", ascending=True)
unknown_traders = unknown_traders.sort_values(
by="creation_timestamp", ascending=True
)
traders_data["month_year_week"] = (
traders_data["creation_timestamp"]
.dt.to_period("W")
.dt.start_time.dt.strftime("%b-%d-%Y")
)
unknown_traders["month_year_week"] = (
unknown_traders["creation_timestamp"]
.dt.to_period("W")
.dt.start_time.dt.strftime("%b-%d-%Y")
)
closed_markets["month_year_week"] = (
closed_markets["opening_datetime"]
.dt.to_period("W")
.dt.start_time.dt.strftime("%b-%d-%Y")
)
# prepare the daa dataframes
daa_pearl_df["day"] = pd.to_datetime(
daa_pearl_df["day"], format="%Y-%m-%d 00:00:00.000 UTC"
)
daa_qs_df["day"] = pd.to_datetime(
daa_qs_df["day"], format="%Y-%m-%d 00:00:00.000 UTC"
)
daa_pearl_df["day"] = daa_pearl_df["day"].dt.tz_localize("UTC")
daa_qs_df["day"] = daa_qs_df["day"].dt.tz_localize("UTC")
daa_qs_df["tx_date"] = pd.to_datetime(daa_qs_df["day"]).dt.date
daa_pearl_df["tx_date"] = pd.to_datetime(daa_pearl_df["day"]).dt.date
daa_pearl_df["seven_day_trailing_avg"] = pd.to_numeric(
daa_pearl_df["seven_day_trailing_avg"], errors="coerce"
)
daa_pearl_df["seven_day_trailing_avg"] = daa_pearl_df[
"seven_day_trailing_avg"
].round(2)
daa_qs_df["seven_day_trailing_avg"] = pd.to_numeric(
daa_qs_df["seven_day_trailing_avg"], errors="coerce"
)
daa_qs_df["seven_day_trailing_avg"] = daa_qs_df["seven_day_trailing_avg"].round(2)
return (
traders_data,
closed_markets,
daily_info,
unknown_traders,
retention_df,
active_traders,
all_mech_calls,
daa_qs_df,
daa_pearl_df,
weekly_avg_roi_pearl_agents,
two_weeks_avg_roi_pearl_agents,
traders_weekly_metrics_df,
)