Spaces:

valory
/

olas-prediction-live-dashboard

Running

App Files Files Community

rosacastillo commited on 7 days ago

Commit

e8f0e08

•

1 Parent(s): ea0955a

new weekly data and some amends

Browse files

Files changed (17) hide show

data/all_trades_profitability.parquet +2 -2
data/daily_info.parquet +2 -2
data/error_by_markets.parquet +2 -2
data/tools_accuracy.csv +2 -2
data/unknown_traders.parquet +2 -2
data/winning_df.parquet +2 -2
scripts/cleaning_old_info.py +17 -1
scripts/daily_data.py +8 -6
scripts/get_mech_info.py +15 -0
scripts/markets.py +0 -2
scripts/mech_request_utils.py +8 -3
scripts/nr_mech_calls.py +2 -2
scripts/profitability.py +14 -3
scripts/pull_data.py +6 -4
scripts/tools.py +3 -1
scripts/update_tools_accuracy.py +0 -5
scripts/web3_utils.py +3 -3

data/all_trades_profitability.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2dc010db5a3f4163f3d09274101a14cd63a860e64c92649c694c816f28799342
-size 6789999

 version https://git-lfs.github.com/spec/v1
+oid sha256:533530b73aa7075ddb221e0820df23f77a87db90da0bbf9404ea1a98b80d9bc5
+size 6389356

data/daily_info.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fed76273653048f900faca2d612b07f42be43d076238f0dac7f30e8882a1ec1b
-size 374565

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9f224f954dd108e164b12763dd628e05a5f17a94fd2422d9853f60f470a690d
+size 697569

data/error_by_markets.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cbe47e7cb744db4522161c6c121ac9393937d53ca372a2210952f7a469f59489
-size 12067

 version https://git-lfs.github.com/spec/v1
+oid sha256:13112e4809f1c2451419991c7737171fad6b3537f5d43d9e9e72d350b98f7083
+size 12552

data/tools_accuracy.csv CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:818026934d2218b01f130770ffcb7563c80de0900be6721a55cd2499f9731889
-size 1100

 version https://git-lfs.github.com/spec/v1
+oid sha256:09cf501daa10343c3e3a9a93fa81290e8399db2ec2b0550e722730bcd13a423e
+size 1101

data/unknown_traders.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0164ef5ecaf966a5dcc677d96bba860c344f43cf53e237b6687b797502bd5e36
-size 184719

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd75418327e20282ad0793d5f092a362d6572d5d823b87da39ba874ea2938154
+size 184739

data/winning_df.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fe676fcd7dde4b833f770dafa8e474a96bbe17fb16b9ceb160c03c2519ba72b4
-size 12980

 version https://git-lfs.github.com/spec/v1
+oid sha256:e93e4e91ce125aa92dcfd206ad366c86b758aed598b2ce40403c22acd05f5e5c
+size 13042

scripts/cleaning_old_info.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import pandas as pd
-from utils import DATA_DIR
 def clean_old_data_from_parquet_files(cutoff_date: str):
@@ -63,6 +63,22 @@ def clean_old_data_from_parquet_files(cutoff_date: str):
     except Exception as e:
         print(f"Error cleaning unknown_traders file {e}")
 if __name__ == "__main__":
     clean_old_data_from_parquet_files("2024-10-25")

 import pandas as pd
+from utils import DATA_DIR, TMP_DIR
 def clean_old_data_from_parquet_files(cutoff_date: str):
     except Exception as e:
         print(f"Error cleaning unknown_traders file {e}")
+    # clean fpmmTrades.parquet
+    try:
+        fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
+        fpmmTrades["creation_timestamp"] = pd.to_datetime(
+            fpmmTrades["creation_timestamp"], utc=True
+        )
+        print(f"length before filtering {len(fpmmTrades)}")
+        fpmmTrades = fpmmTrades.loc[fpmmTrades["creation_timestamp"] > min_date_utc]
+        print(f"length after filtering {len(fpmmTrades)}")
+        fpmmTrades.to_parquet(TMP_DIR / "fpmmTrades.parquet", index=False)
+    except Exception as e:
+        print(f"Error cleaning fpmmTrades file {e}")
 if __name__ == "__main__":
     clean_old_data_from_parquet_files("2024-10-25")

scripts/daily_data.py CHANGED Viewed

@@ -21,15 +21,17 @@ def prepare_live_metrics(
     fpmmTrades = pd.read_parquet(TMP_DIR / trades_filename)
     tools = pd.read_parquet(TMP_DIR / tools_filename)
-    fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
-        lambda x: transform_to_datetime(x)
-    )
     print("Computing the estimated mech calls dataset")
     trader_mech_calls = compute_daily_mech_calls(fpmmTrades=fpmmTrades, tools=tools)
     print("Analysing trades...")
-    all_trades_df = analyse_all_traders(
-        fpmmTrades, tools, trader_mech_calls, daily_info=True
-    )
     # staking label
     all_trades_df = label_trades_by_staking(all_trades_df)

     fpmmTrades = pd.read_parquet(TMP_DIR / trades_filename)
     tools = pd.read_parquet(TMP_DIR / tools_filename)
+    try:
+        fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
+            lambda x: transform_to_datetime(x)
+        )
+    except Exception as e:
+        print(f"Transformation not needed")
     print("Computing the estimated mech calls dataset")
     trader_mech_calls = compute_daily_mech_calls(fpmmTrades=fpmmTrades, tools=tools)
     print("Analysing trades...")
+    all_trades_df = analyse_all_traders(fpmmTrades, trader_mech_calls, daily_info=True)
     # staking label
     all_trades_df = label_trades_by_staking(all_trades_df)

scripts/get_mech_info.py CHANGED Viewed

@@ -22,6 +22,7 @@ from mech_request_utils import (
     merge_json_files,
 )
 from web3_utils import updating_timestamps
 SUBGRAPH_HEADERS = {
     "Accept": "application/json, multipart/mixed",
@@ -132,6 +133,20 @@ def update_fpmmTrades_parquet(trades_filename: str) -> pd.DataFrame:
         print(f"Error reading new trades parquet file {e}")
         return None
     # merge two dataframes
     merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
     # avoid numpy objects

     merge_json_files,
 )
 from web3_utils import updating_timestamps
+from nr_mech_calls import transform_to_datetime
 SUBGRAPH_HEADERS = {
     "Accept": "application/json, multipart/mixed",
         print(f"Error reading new trades parquet file {e}")
         return None
+    # ensure creationTimestamp compatibility
+    try:
+        new_trades_df["creationTimestamp"] = new_trades_df["creationTimestamp"].apply(
+            lambda x: transform_to_datetime(x)
+        )
+    except Exception as e:
+        print(f"Transformation not needed")
+    try:
+        old_trades_df["creationTimestamp"] = old_trades_df["creationTimestamp"].apply(
+            lambda x: transform_to_datetime(x)
+        )
+    except Exception as e:
+        print(f"Transformation not needed")
     # merge two dataframes
     merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
     # avoid numpy objects

scripts/markets.py CHANGED Viewed

@@ -153,8 +153,6 @@ def transform_fpmmTrades(df: pd.DataFrame) -> pd.DataFrame:
     # change creator to creator_address
     df.rename(columns={"creator": "trader_address"}, inplace=True)
-    print(df.head())
-    print(df.info())
     return df

     # change creator to creator_address
     df.rename(columns={"creator": "trader_address"}, inplace=True)
     return df

scripts/mech_request_utils.py CHANGED Viewed

@@ -33,7 +33,12 @@ from tools import (
     request,
 )
 from tqdm import tqdm
-from web3_utils import FPMM_QS_CREATOR, FPMM_PEARL_CREATOR, IPFS_POLL_INTERVAL
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from utils import DATA_DIR, JSON_DATA_DIR, MECH_SUBGRAPH_URL, SUBGRAPH_API_KEY
@@ -139,7 +144,7 @@ def collect_all_mech_requests(from_block: int, to_block: int, filename: str) ->
             print(f"Error while getting the response: {e}")
         id_gt = items[-1]["id"]
-        time.sleep(IPFS_POLL_INTERVAL)
         print(f"New execution for id_gt = {id_gt}")
         if len(duplicated_reqIds) > 0:
             print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
@@ -202,7 +207,7 @@ def collect_all_mech_delivers(from_block: int, to_block: int, filename: str) ->
             # return None, None
         id_gt = items[-1]["id"]
-        time.sleep(IPFS_POLL_INTERVAL)
         print(f"New execution for id_gt = {id_gt}")
         if len(duplicated_requestIds) > 0:
             print(f"Number of duplicated request id = {len(duplicated_requestIds)}")

     request,
 )
 from tqdm import tqdm
+from web3_utils import (
+    FPMM_QS_CREATOR,
+    FPMM_PEARL_CREATOR,
+    IPFS_POLL_INTERVAL,
+    SUBGRAPH_POLL_INTERVAL,
+)
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from utils import DATA_DIR, JSON_DATA_DIR, MECH_SUBGRAPH_URL, SUBGRAPH_API_KEY
             print(f"Error while getting the response: {e}")
         id_gt = items[-1]["id"]
+        time.sleep(SUBGRAPH_POLL_INTERVAL)
         print(f"New execution for id_gt = {id_gt}")
         if len(duplicated_reqIds) > 0:
             print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
             # return None, None
         id_gt = items[-1]["id"]
+        time.sleep(SUBGRAPH_POLL_INTERVAL)
         print(f"New execution for id_gt = {id_gt}")
         if len(duplicated_requestIds) > 0:
             print(f"Number of duplicated request id = {len(duplicated_requestIds)}")

scripts/nr_mech_calls.py CHANGED Viewed

@@ -144,7 +144,7 @@ def compute_daily_mech_calls(
     nr_traders = len(fpmmTrades["trader_address"].unique())
     fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
     fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
-    trades_df = trades_df.sort_values(by="creation_timestamp", ascending=True)
     tools["request_time"] = pd.to_datetime(tools["request_time"])
     tools["request_date"] = tools["request_time"].dt.date
     tools = tools.sort_values(by="request_time", ascending=True)
@@ -152,7 +152,7 @@ def compute_daily_mech_calls(
     for trader in tqdm(
         fpmmTrades["trader_address"].unique(),
         total=nr_traders,
-        desc="creating mech calls estimation based on timestamps",
     ):
         # compute the mech calls estimations for each trader
         all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]

     nr_traders = len(fpmmTrades["trader_address"].unique())
     fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
     fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
+    fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
     tools["request_time"] = pd.to_datetime(tools["request_time"])
     tools["request_date"] = tools["request_time"].dt.date
     tools = tools.sort_values(by="request_time", ascending=True)
     for trader in tqdm(
         fpmmTrades["trader_address"].unique(),
         total=nr_traders,
+        desc="creating daily mech calls computation",
     ):
         # compute the mech calls estimations for each trader
         all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]

scripts/profitability.py CHANGED Viewed

@@ -361,14 +361,19 @@ def run_profitability_analysis(
     tools = pd.read_parquet(TMP_DIR / "tools.parquet")
-    fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
-        lambda x: transform_to_datetime(x)
-    )
     print("Computing the estimated mech calls dataset")
     trade_mech_calls = compute_mech_calls_based_on_timestamps(
         fpmmTrades=fpmmTrades, tools=tools
     )
     trade_mech_calls.to_parquet(TMP_DIR / "trade_mech_calls.parquet")
     print(trade_mech_calls.total_mech_calls.describe())
     print("Analysing trades...")
     all_trades_df = analyse_all_traders(fpmmTrades, trade_mech_calls)
@@ -410,6 +415,12 @@ def run_profitability_analysis(
     unknown_traders_df, all_trades_df = create_unknown_traders_df(
         trades_df=all_trades_df
     )
     unknown_traders_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
     # save to parquet

     tools = pd.read_parquet(TMP_DIR / "tools.parquet")
+    try:
+        fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
+            lambda x: transform_to_datetime(x)
+        )
+    except Exception as e:
+        print(f"Transformation not needed")
     print("Computing the estimated mech calls dataset")
     trade_mech_calls = compute_mech_calls_based_on_timestamps(
         fpmmTrades=fpmmTrades, tools=tools
     )
     trade_mech_calls.to_parquet(TMP_DIR / "trade_mech_calls.parquet")
     print(trade_mech_calls.total_mech_calls.describe())
     print("Analysing trades...")
     all_trades_df = analyse_all_traders(fpmmTrades, trade_mech_calls)
     unknown_traders_df, all_trades_df = create_unknown_traders_df(
         trades_df=all_trades_df
     )
+    # merge with previous unknown traders dataset
+    previous_unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
+    unknown_traders_df = pd.concat(
+        [unknown_traders_df, previous_unknown_traders], ignore_index=True
+    )
     unknown_traders_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
     # save to parquet

scripts/pull_data.py CHANGED Viewed

@@ -81,7 +81,7 @@ def only_new_weekly_analysis():
     rpc = RPC
     # Run markets ETL
     logging.info("Running markets ETL")
-    # mkt_etl(MARKETS_FILENAME)
     logging.info("Markets ETL completed")
     # Mech events ETL
@@ -121,9 +121,11 @@ def only_new_weekly_analysis():
     update_json_files()
     save_historical_data()
-    clean_old_data_from_parquet_files("2024-10-25")
     compute_tools_accuracy()
     compute_tools_based_datasets()
     # # move to tmp folder the new generated files

     rpc = RPC
     # Run markets ETL
     logging.info("Running markets ETL")
+    mkt_etl(MARKETS_FILENAME)
     logging.info("Markets ETL completed")
     # Mech events ETL
     update_json_files()
     save_historical_data()
+    try:
+        clean_old_data_from_parquet_files("2024-10-29")
+    except Exception as e:
+        print("Error cleaning the oldest information from parquet files")
+        print(f"reason = {e}")
     compute_tools_accuracy()
     compute_tools_based_datasets()
     # # move to tmp folder the new generated files

scripts/tools.py CHANGED Viewed

@@ -72,7 +72,9 @@ BACKOFF_FACTOR = 1
 STATUS_FORCELIST = [404, 500, 502, 503, 504]
 DEFAULT_FILENAME = "tools.parquet"
 ABI_ERROR = "The event signature did not match the provided ABI"
-HTTP_TIMEOUT = 10
 IRRELEVANT_TOOLS = [
     "openai-text-davinci-002",

 STATUS_FORCELIST = [404, 500, 502, 503, 504]
 DEFAULT_FILENAME = "tools.parquet"
 ABI_ERROR = "The event signature did not match the provided ABI"
+# HTTP_TIMEOUT = 10
+# Increasing when ipfs is slow
+HTTP_TIMEOUT = 15
 IRRELEVANT_TOOLS = [
     "openai-text-davinci-002",

scripts/update_tools_accuracy.py CHANGED Viewed

@@ -29,8 +29,6 @@ def update_tools_accuracy(
         tools_non_error["currentAnswer"] == tools_non_error["vote"]
     ).astype(int)
     tools_non_error.columns = tools_non_error.columns.astype(str)
-    print("Tools dataset after filtering")
-    print(tools_non_error.head())
     wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0)
     wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100
@@ -39,8 +37,6 @@ def update_tools_accuracy(
     wins.columns = wins.columns.astype(str)
     wins = wins[["tool", "tool_accuracy", "total_requests"]]
-    print("Wins dataset")
-    print(wins.head())
     no_timeline_info = False
     try:
         timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"])
@@ -100,7 +96,6 @@ def compute_tools_accuracy():
     print("Computing accuracy of tools")
     print("Reading tools parquet file")
     tools = pd.read_parquet(TMP_DIR / "tools.parquet")
-    print(tools.head())
     # Computing tools accuracy information
     print("Computing tool accuracy information")
     # Check if the file exists

         tools_non_error["currentAnswer"] == tools_non_error["vote"]
     ).astype(int)
     tools_non_error.columns = tools_non_error.columns.astype(str)
     wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0)
     wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100
     wins.columns = wins.columns.astype(str)
     wins = wins[["tool", "tool_accuracy", "total_requests"]]
     no_timeline_info = False
     try:
         timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"])
     print("Computing accuracy of tools")
     print("Reading tools parquet file")
     tools = pd.read_parquet(TMP_DIR / "tools.parquet")
     # Computing tools accuracy information
     print("Computing tool accuracy information")
     # Check if the file exists

scripts/web3_utils.py CHANGED Viewed

@@ -33,11 +33,11 @@ LATEST_BLOCK: Optional[int] = None
 LATEST_BLOCK_NAME: BlockParams = "latest"
 BLOCK_DATA_NUMBER = "number"
 BLOCKS_CHUNK_SIZE = 10_000
-N_IPFS_RETRIES = 1
 N_RPC_RETRIES = 100
 RPC_POLL_INTERVAL = 0.05
-# IPFS_POLL_INTERVAL = 0.05  # low speed
-IPFS_POLL_INTERVAL = 0.2  # high speed
 OMEN_SUBGRAPH_URL = Template(
     """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
 )

 LATEST_BLOCK_NAME: BlockParams = "latest"
 BLOCK_DATA_NUMBER = "number"
 BLOCKS_CHUNK_SIZE = 10_000
+N_IPFS_RETRIES = 2
 N_RPC_RETRIES = 100
 RPC_POLL_INTERVAL = 0.05
+SUBGRAPH_POLL_INTERVAL = 0.05
+IPFS_POLL_INTERVAL = 0.2  # 5 calls per second
 OMEN_SUBGRAPH_URL = Template(
     """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
 )