import pandas as pd from pathlib import Path from datetime import datetime, timedelta import gzip import shutil import os from huggingface_hub import hf_hub_download SCRIPTS_DIR = Path(__file__).parent ROOT_DIR = SCRIPTS_DIR.parent TMP_DIR = ROOT_DIR / "tmp" def get_traders_family(row: pd.DataFrame) -> str: if row.staking == "non_agent": return "non_agent" elif row.market_creator == "pearl": return "pearl_agent" # quickstart return "quickstart_agent" def get_current_week(): current_date = datetime.now() # Get the start and end dates of the current week (starting on Sunday) current_week_start = current_date - timedelta(days=current_date.weekday() + 1) return current_week_start.strftime("%b-%d-%Y") def get_next_week(): current_date = datetime.now() next_week_start = current_date + timedelta(days=7 - (current_date.weekday() + 1)) return next_week_start.strftime("%b-%d-%Y") def load_all_data(): # all trades profitability # Download the compressed file gz_filepath_trades = hf_hub_download( repo_id="valory/Olas-predict-dataset", filename="all_trades_profitability.parquet.gz", repo_type="dataset", ) parquet_filepath_trades = gz_filepath_trades.replace(".gz", "") parquet_filepath_trades = parquet_filepath_trades.replace("all", "") with gzip.open(gz_filepath_trades, "rb") as f_in: with open(parquet_filepath_trades, "wb") as f_out: shutil.copyfileobj(f_in, f_out) # Now read the decompressed parquet file df1 = pd.read_parquet(parquet_filepath_trades) # closed_markets_div closed_markets_df = hf_hub_download( repo_id="valory/Olas-predict-dataset", filename="closed_markets_div.parquet", repo_type="dataset", ) df2 = pd.read_parquet(closed_markets_df) # daily_info daily_info_df = hf_hub_download( repo_id="valory/Olas-predict-dataset", filename="daily_info.parquet", repo_type="dataset", ) df3 = pd.read_parquet(daily_info_df) # unknown traders unknown_df = hf_hub_download( repo_id="valory/Olas-predict-dataset", filename="unknown_traders.parquet", repo_type="dataset", ) df4 = pd.read_parquet(unknown_df) # retention activity gz_file_path_ret = hf_hub_download( repo_id="valory/Olas-predict-dataset", filename="retention_activity.parquet.gz", repo_type="dataset", ) parquet_file_path_ret = gz_file_path_ret.replace(".gz", "") with gzip.open(gz_file_path_ret, "rb") as f_in: with open(parquet_file_path_ret, "wb") as f_out: shutil.copyfileobj(f_in, f_out) df5 = pd.read_parquet(parquet_file_path_ret) # os.remove(parquet_file_path_ret) # active_traders.parquet active_traders_df = hf_hub_download( repo_id="valory/Olas-predict-dataset", filename="active_traders.parquet", repo_type="dataset", ) df6 = pd.read_parquet(active_traders_df) # weekly_mech_calls.parquet all_mech_calls_df = hf_hub_download( repo_id="valory/Olas-predict-dataset", filename="weekly_mech_calls.parquet", repo_type="dataset", ) df7 = pd.read_parquet(all_mech_calls_df) # daa for quickstart and pearl daa_qs_df = hf_hub_download( repo_id="valory/Olas-predict-dataset", filename="latest_result_DAA_QS.parquet", repo_type="dataset", ) df8 = pd.read_parquet(daa_qs_df) daa_pearl_df = hf_hub_download( repo_id="valory/Olas-predict-dataset", filename="latest_result_DAA_Pearl.parquet", repo_type="dataset", ) df9 = pd.read_parquet(daa_pearl_df) # Read weekly_avg_roi_pearl_agents.parquet weekly_avg_roi_pearl_agents = hf_hub_download( repo_id="valory/Olas-predict-dataset", filename="weekly_avg_roi_pearl_agents.parquet", repo_type="dataset", ) df10 = pd.read_parquet(weekly_avg_roi_pearl_agents) # two_weeks_avg_roi_pearl_agents.parquet two_weeks_avg_roi_pearl_agents = hf_hub_download( repo_id="valory/Olas-predict-dataset", filename="two_weeks_avg_roi_pearl_agents.parquet", repo_type="dataset", ) df11 = pd.read_parquet(two_weeks_avg_roi_pearl_agents) # read traders_weekly_metrics.parquet file traders_weekly_metrics_df = hf_hub_download( repo_id="valory/Olas-predict-dataset", filename="traders_weekly_metrics.parquet", repo_type="dataset", ) df12 = pd.read_parquet(traders_weekly_metrics_df) return df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12 def prepare_data(): ( all_trades, closed_markets, daily_info, unknown_traders, retention_df, active_traders, all_mech_calls, daa_qs_df, daa_pearl_df, weekly_avg_roi_pearl_agents, two_weeks_avg_roi_pearl_agents, traders_weekly_metrics_df, ) = load_all_data() all_trades["creation_timestamp"] = all_trades["creation_timestamp"].dt.tz_convert( "UTC" ) all_trades = all_trades.sort_values(by="creation_timestamp", ascending=True) all_trades["creation_date"] = all_trades["creation_timestamp"].dt.date # nr-trades variable volume_trades_per_trader_and_market = ( all_trades.groupby(["trader_address", "title"])["roi"] .count() .reset_index(name="nr_trades_per_market") ) traders_data = pd.merge( all_trades, volume_trades_per_trader_and_market, on=["trader_address", "title"] ) daily_info["creation_date"] = daily_info["creation_timestamp"].dt.date unknown_traders["creation_date"] = unknown_traders["creation_timestamp"].dt.date active_traders["creation_date"] = active_traders["creation_timestamp"].dt.date # adding the trader family column traders_data["trader_family"] = traders_data.apply( lambda x: get_traders_family(x), axis=1 ) # print(traders_data.head()) traders_data = traders_data.sort_values(by="creation_timestamp", ascending=True) unknown_traders = unknown_traders.sort_values( by="creation_timestamp", ascending=True ) traders_data["month_year_week"] = ( traders_data["creation_timestamp"] .dt.to_period("W") .dt.start_time.dt.strftime("%b-%d-%Y") ) unknown_traders["month_year_week"] = ( unknown_traders["creation_timestamp"] .dt.to_period("W") .dt.start_time.dt.strftime("%b-%d-%Y") ) closed_markets["month_year_week"] = ( closed_markets["opening_datetime"] .dt.to_period("W") .dt.start_time.dt.strftime("%b-%d-%Y") ) # prepare the daa dataframes daa_pearl_df["day"] = pd.to_datetime( daa_pearl_df["day"], format="%Y-%m-%d 00:00:00.000 UTC" ) daa_qs_df["day"] = pd.to_datetime( daa_qs_df["day"], format="%Y-%m-%d 00:00:00.000 UTC" ) daa_pearl_df["day"] = daa_pearl_df["day"].dt.tz_localize("UTC") daa_qs_df["day"] = daa_qs_df["day"].dt.tz_localize("UTC") daa_qs_df["tx_date"] = pd.to_datetime(daa_qs_df["day"]).dt.date daa_pearl_df["tx_date"] = pd.to_datetime(daa_pearl_df["day"]).dt.date daa_pearl_df["seven_day_trailing_avg"] = pd.to_numeric( daa_pearl_df["seven_day_trailing_avg"], errors="coerce" ) daa_pearl_df["seven_day_trailing_avg"] = daa_pearl_df[ "seven_day_trailing_avg" ].round(2) daa_qs_df["seven_day_trailing_avg"] = pd.to_numeric( daa_qs_df["seven_day_trailing_avg"], errors="coerce" ) daa_qs_df["seven_day_trailing_avg"] = daa_qs_df["seven_day_trailing_avg"].round(2) return ( traders_data, closed_markets, daily_info, unknown_traders, retention_df, active_traders, all_mech_calls, daa_qs_df, daa_pearl_df, weekly_avg_roi_pearl_agents, two_weeks_avg_roi_pearl_agents, traders_weekly_metrics_df, )