rosacastillo commited on
Commit
e8f0e08
1 Parent(s): ea0955a

new weekly data and some amends

Browse files
data/all_trades_profitability.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dc010db5a3f4163f3d09274101a14cd63a860e64c92649c694c816f28799342
3
- size 6789999
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:533530b73aa7075ddb221e0820df23f77a87db90da0bbf9404ea1a98b80d9bc5
3
+ size 6389356
data/daily_info.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fed76273653048f900faca2d612b07f42be43d076238f0dac7f30e8882a1ec1b
3
- size 374565
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9f224f954dd108e164b12763dd628e05a5f17a94fd2422d9853f60f470a690d
3
+ size 697569
data/error_by_markets.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbe47e7cb744db4522161c6c121ac9393937d53ca372a2210952f7a469f59489
3
- size 12067
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13112e4809f1c2451419991c7737171fad6b3537f5d43d9e9e72d350b98f7083
3
+ size 12552
data/tools_accuracy.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:818026934d2218b01f130770ffcb7563c80de0900be6721a55cd2499f9731889
3
- size 1100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09cf501daa10343c3e3a9a93fa81290e8399db2ec2b0550e722730bcd13a423e
3
+ size 1101
data/unknown_traders.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0164ef5ecaf966a5dcc677d96bba860c344f43cf53e237b6687b797502bd5e36
3
- size 184719
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd75418327e20282ad0793d5f092a362d6572d5d823b87da39ba874ea2938154
3
+ size 184739
data/winning_df.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe676fcd7dde4b833f770dafa8e474a96bbe17fb16b9ceb160c03c2519ba72b4
3
- size 12980
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e93e4e91ce125aa92dcfd206ad366c86b758aed598b2ce40403c22acd05f5e5c
3
+ size 13042
scripts/cleaning_old_info.py CHANGED
@@ -1,5 +1,5 @@
1
  import pandas as pd
2
- from utils import DATA_DIR
3
 
4
 
5
  def clean_old_data_from_parquet_files(cutoff_date: str):
@@ -63,6 +63,22 @@ def clean_old_data_from_parquet_files(cutoff_date: str):
63
  except Exception as e:
64
  print(f"Error cleaning unknown_traders file {e}")
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  if __name__ == "__main__":
68
  clean_old_data_from_parquet_files("2024-10-25")
 
1
  import pandas as pd
2
+ from utils import DATA_DIR, TMP_DIR
3
 
4
 
5
  def clean_old_data_from_parquet_files(cutoff_date: str):
 
63
  except Exception as e:
64
  print(f"Error cleaning unknown_traders file {e}")
65
 
66
+ # clean fpmmTrades.parquet
67
+ try:
68
+ fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
69
+
70
+ fpmmTrades["creation_timestamp"] = pd.to_datetime(
71
+ fpmmTrades["creation_timestamp"], utc=True
72
+ )
73
+
74
+ print(f"length before filtering {len(fpmmTrades)}")
75
+ fpmmTrades = fpmmTrades.loc[fpmmTrades["creation_timestamp"] > min_date_utc]
76
+ print(f"length after filtering {len(fpmmTrades)}")
77
+ fpmmTrades.to_parquet(TMP_DIR / "fpmmTrades.parquet", index=False)
78
+
79
+ except Exception as e:
80
+ print(f"Error cleaning fpmmTrades file {e}")
81
+
82
 
83
  if __name__ == "__main__":
84
  clean_old_data_from_parquet_files("2024-10-25")
scripts/daily_data.py CHANGED
@@ -21,15 +21,17 @@ def prepare_live_metrics(
21
  fpmmTrades = pd.read_parquet(TMP_DIR / trades_filename)
22
  tools = pd.read_parquet(TMP_DIR / tools_filename)
23
 
24
- fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
25
- lambda x: transform_to_datetime(x)
26
- )
 
 
 
 
27
  print("Computing the estimated mech calls dataset")
28
  trader_mech_calls = compute_daily_mech_calls(fpmmTrades=fpmmTrades, tools=tools)
29
  print("Analysing trades...")
30
- all_trades_df = analyse_all_traders(
31
- fpmmTrades, tools, trader_mech_calls, daily_info=True
32
- )
33
 
34
  # staking label
35
  all_trades_df = label_trades_by_staking(all_trades_df)
 
21
  fpmmTrades = pd.read_parquet(TMP_DIR / trades_filename)
22
  tools = pd.read_parquet(TMP_DIR / tools_filename)
23
 
24
+ try:
25
+ fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
26
+ lambda x: transform_to_datetime(x)
27
+ )
28
+ except Exception as e:
29
+ print(f"Transformation not needed")
30
+
31
  print("Computing the estimated mech calls dataset")
32
  trader_mech_calls = compute_daily_mech_calls(fpmmTrades=fpmmTrades, tools=tools)
33
  print("Analysing trades...")
34
+ all_trades_df = analyse_all_traders(fpmmTrades, trader_mech_calls, daily_info=True)
 
 
35
 
36
  # staking label
37
  all_trades_df = label_trades_by_staking(all_trades_df)
scripts/get_mech_info.py CHANGED
@@ -22,6 +22,7 @@ from mech_request_utils import (
22
  merge_json_files,
23
  )
24
  from web3_utils import updating_timestamps
 
25
 
26
  SUBGRAPH_HEADERS = {
27
  "Accept": "application/json, multipart/mixed",
@@ -132,6 +133,20 @@ def update_fpmmTrades_parquet(trades_filename: str) -> pd.DataFrame:
132
  print(f"Error reading new trades parquet file {e}")
133
  return None
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  # merge two dataframes
136
  merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
137
  # avoid numpy objects
 
22
  merge_json_files,
23
  )
24
  from web3_utils import updating_timestamps
25
+ from nr_mech_calls import transform_to_datetime
26
 
27
  SUBGRAPH_HEADERS = {
28
  "Accept": "application/json, multipart/mixed",
 
133
  print(f"Error reading new trades parquet file {e}")
134
  return None
135
 
136
+ # ensure creationTimestamp compatibility
137
+ try:
138
+ new_trades_df["creationTimestamp"] = new_trades_df["creationTimestamp"].apply(
139
+ lambda x: transform_to_datetime(x)
140
+ )
141
+ except Exception as e:
142
+ print(f"Transformation not needed")
143
+ try:
144
+ old_trades_df["creationTimestamp"] = old_trades_df["creationTimestamp"].apply(
145
+ lambda x: transform_to_datetime(x)
146
+ )
147
+ except Exception as e:
148
+ print(f"Transformation not needed")
149
+
150
  # merge two dataframes
151
  merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
152
  # avoid numpy objects
scripts/markets.py CHANGED
@@ -153,8 +153,6 @@ def transform_fpmmTrades(df: pd.DataFrame) -> pd.DataFrame:
153
 
154
  # change creator to creator_address
155
  df.rename(columns={"creator": "trader_address"}, inplace=True)
156
- print(df.head())
157
- print(df.info())
158
  return df
159
 
160
 
 
153
 
154
  # change creator to creator_address
155
  df.rename(columns={"creator": "trader_address"}, inplace=True)
 
 
156
  return df
157
 
158
 
scripts/mech_request_utils.py CHANGED
@@ -33,7 +33,12 @@ from tools import (
33
  request,
34
  )
35
  from tqdm import tqdm
36
- from web3_utils import FPMM_QS_CREATOR, FPMM_PEARL_CREATOR, IPFS_POLL_INTERVAL
 
 
 
 
 
37
  from concurrent.futures import ThreadPoolExecutor, as_completed
38
  from utils import DATA_DIR, JSON_DATA_DIR, MECH_SUBGRAPH_URL, SUBGRAPH_API_KEY
39
 
@@ -139,7 +144,7 @@ def collect_all_mech_requests(from_block: int, to_block: int, filename: str) ->
139
  print(f"Error while getting the response: {e}")
140
 
141
  id_gt = items[-1]["id"]
142
- time.sleep(IPFS_POLL_INTERVAL)
143
  print(f"New execution for id_gt = {id_gt}")
144
  if len(duplicated_reqIds) > 0:
145
  print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
@@ -202,7 +207,7 @@ def collect_all_mech_delivers(from_block: int, to_block: int, filename: str) ->
202
  # return None, None
203
 
204
  id_gt = items[-1]["id"]
205
- time.sleep(IPFS_POLL_INTERVAL)
206
  print(f"New execution for id_gt = {id_gt}")
207
  if len(duplicated_requestIds) > 0:
208
  print(f"Number of duplicated request id = {len(duplicated_requestIds)}")
 
33
  request,
34
  )
35
  from tqdm import tqdm
36
+ from web3_utils import (
37
+ FPMM_QS_CREATOR,
38
+ FPMM_PEARL_CREATOR,
39
+ IPFS_POLL_INTERVAL,
40
+ SUBGRAPH_POLL_INTERVAL,
41
+ )
42
  from concurrent.futures import ThreadPoolExecutor, as_completed
43
  from utils import DATA_DIR, JSON_DATA_DIR, MECH_SUBGRAPH_URL, SUBGRAPH_API_KEY
44
 
 
144
  print(f"Error while getting the response: {e}")
145
 
146
  id_gt = items[-1]["id"]
147
+ time.sleep(SUBGRAPH_POLL_INTERVAL)
148
  print(f"New execution for id_gt = {id_gt}")
149
  if len(duplicated_reqIds) > 0:
150
  print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
 
207
  # return None, None
208
 
209
  id_gt = items[-1]["id"]
210
+ time.sleep(SUBGRAPH_POLL_INTERVAL)
211
  print(f"New execution for id_gt = {id_gt}")
212
  if len(duplicated_requestIds) > 0:
213
  print(f"Number of duplicated request id = {len(duplicated_requestIds)}")
scripts/nr_mech_calls.py CHANGED
@@ -144,7 +144,7 @@ def compute_daily_mech_calls(
144
  nr_traders = len(fpmmTrades["trader_address"].unique())
145
  fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
146
  fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
147
- trades_df = trades_df.sort_values(by="creation_timestamp", ascending=True)
148
  tools["request_time"] = pd.to_datetime(tools["request_time"])
149
  tools["request_date"] = tools["request_time"].dt.date
150
  tools = tools.sort_values(by="request_time", ascending=True)
@@ -152,7 +152,7 @@ def compute_daily_mech_calls(
152
  for trader in tqdm(
153
  fpmmTrades["trader_address"].unique(),
154
  total=nr_traders,
155
- desc="creating mech calls estimation based on timestamps",
156
  ):
157
  # compute the mech calls estimations for each trader
158
  all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
 
144
  nr_traders = len(fpmmTrades["trader_address"].unique())
145
  fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
146
  fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
147
+ fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
148
  tools["request_time"] = pd.to_datetime(tools["request_time"])
149
  tools["request_date"] = tools["request_time"].dt.date
150
  tools = tools.sort_values(by="request_time", ascending=True)
 
152
  for trader in tqdm(
153
  fpmmTrades["trader_address"].unique(),
154
  total=nr_traders,
155
+ desc="creating daily mech calls computation",
156
  ):
157
  # compute the mech calls estimations for each trader
158
  all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
scripts/profitability.py CHANGED
@@ -361,14 +361,19 @@ def run_profitability_analysis(
361
 
362
  tools = pd.read_parquet(TMP_DIR / "tools.parquet")
363
 
364
- fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
365
- lambda x: transform_to_datetime(x)
366
- )
 
 
 
 
367
  print("Computing the estimated mech calls dataset")
368
  trade_mech_calls = compute_mech_calls_based_on_timestamps(
369
  fpmmTrades=fpmmTrades, tools=tools
370
  )
371
  trade_mech_calls.to_parquet(TMP_DIR / "trade_mech_calls.parquet")
 
372
  print(trade_mech_calls.total_mech_calls.describe())
373
  print("Analysing trades...")
374
  all_trades_df = analyse_all_traders(fpmmTrades, trade_mech_calls)
@@ -410,6 +415,12 @@ def run_profitability_analysis(
410
  unknown_traders_df, all_trades_df = create_unknown_traders_df(
411
  trades_df=all_trades_df
412
  )
 
 
 
 
 
 
413
  unknown_traders_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
414
 
415
  # save to parquet
 
361
 
362
  tools = pd.read_parquet(TMP_DIR / "tools.parquet")
363
 
364
+ try:
365
+ fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
366
+ lambda x: transform_to_datetime(x)
367
+ )
368
+ except Exception as e:
369
+ print(f"Transformation not needed")
370
+
371
  print("Computing the estimated mech calls dataset")
372
  trade_mech_calls = compute_mech_calls_based_on_timestamps(
373
  fpmmTrades=fpmmTrades, tools=tools
374
  )
375
  trade_mech_calls.to_parquet(TMP_DIR / "trade_mech_calls.parquet")
376
+
377
  print(trade_mech_calls.total_mech_calls.describe())
378
  print("Analysing trades...")
379
  all_trades_df = analyse_all_traders(fpmmTrades, trade_mech_calls)
 
415
  unknown_traders_df, all_trades_df = create_unknown_traders_df(
416
  trades_df=all_trades_df
417
  )
418
+ # merge with previous unknown traders dataset
419
+ previous_unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
420
+
421
+ unknown_traders_df = pd.concat(
422
+ [unknown_traders_df, previous_unknown_traders], ignore_index=True
423
+ )
424
  unknown_traders_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
425
 
426
  # save to parquet
scripts/pull_data.py CHANGED
@@ -81,7 +81,7 @@ def only_new_weekly_analysis():
81
  rpc = RPC
82
  # Run markets ETL
83
  logging.info("Running markets ETL")
84
- # mkt_etl(MARKETS_FILENAME)
85
  logging.info("Markets ETL completed")
86
 
87
  # Mech events ETL
@@ -121,9 +121,11 @@ def only_new_weekly_analysis():
121
  update_json_files()
122
 
123
  save_historical_data()
124
-
125
- clean_old_data_from_parquet_files("2024-10-25")
126
-
 
 
127
  compute_tools_accuracy()
128
  compute_tools_based_datasets()
129
  # # move to tmp folder the new generated files
 
81
  rpc = RPC
82
  # Run markets ETL
83
  logging.info("Running markets ETL")
84
+ mkt_etl(MARKETS_FILENAME)
85
  logging.info("Markets ETL completed")
86
 
87
  # Mech events ETL
 
121
  update_json_files()
122
 
123
  save_historical_data()
124
+ try:
125
+ clean_old_data_from_parquet_files("2024-10-29")
126
+ except Exception as e:
127
+ print("Error cleaning the oldest information from parquet files")
128
+ print(f"reason = {e}")
129
  compute_tools_accuracy()
130
  compute_tools_based_datasets()
131
  # # move to tmp folder the new generated files
scripts/tools.py CHANGED
@@ -72,7 +72,9 @@ BACKOFF_FACTOR = 1
72
  STATUS_FORCELIST = [404, 500, 502, 503, 504]
73
  DEFAULT_FILENAME = "tools.parquet"
74
  ABI_ERROR = "The event signature did not match the provided ABI"
75
- HTTP_TIMEOUT = 10
 
 
76
 
77
  IRRELEVANT_TOOLS = [
78
  "openai-text-davinci-002",
 
72
  STATUS_FORCELIST = [404, 500, 502, 503, 504]
73
  DEFAULT_FILENAME = "tools.parquet"
74
  ABI_ERROR = "The event signature did not match the provided ABI"
75
+ # HTTP_TIMEOUT = 10
76
+ # Increasing when ipfs is slow
77
+ HTTP_TIMEOUT = 15
78
 
79
  IRRELEVANT_TOOLS = [
80
  "openai-text-davinci-002",
scripts/update_tools_accuracy.py CHANGED
@@ -29,8 +29,6 @@ def update_tools_accuracy(
29
  tools_non_error["currentAnswer"] == tools_non_error["vote"]
30
  ).astype(int)
31
  tools_non_error.columns = tools_non_error.columns.astype(str)
32
- print("Tools dataset after filtering")
33
- print(tools_non_error.head())
34
 
35
  wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0)
36
  wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100
@@ -39,8 +37,6 @@ def update_tools_accuracy(
39
  wins.columns = wins.columns.astype(str)
40
  wins = wins[["tool", "tool_accuracy", "total_requests"]]
41
 
42
- print("Wins dataset")
43
- print(wins.head())
44
  no_timeline_info = False
45
  try:
46
  timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"])
@@ -100,7 +96,6 @@ def compute_tools_accuracy():
100
  print("Computing accuracy of tools")
101
  print("Reading tools parquet file")
102
  tools = pd.read_parquet(TMP_DIR / "tools.parquet")
103
- print(tools.head())
104
  # Computing tools accuracy information
105
  print("Computing tool accuracy information")
106
  # Check if the file exists
 
29
  tools_non_error["currentAnswer"] == tools_non_error["vote"]
30
  ).astype(int)
31
  tools_non_error.columns = tools_non_error.columns.astype(str)
 
 
32
 
33
  wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0)
34
  wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100
 
37
  wins.columns = wins.columns.astype(str)
38
  wins = wins[["tool", "tool_accuracy", "total_requests"]]
39
 
 
 
40
  no_timeline_info = False
41
  try:
42
  timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"])
 
96
  print("Computing accuracy of tools")
97
  print("Reading tools parquet file")
98
  tools = pd.read_parquet(TMP_DIR / "tools.parquet")
 
99
  # Computing tools accuracy information
100
  print("Computing tool accuracy information")
101
  # Check if the file exists
scripts/web3_utils.py CHANGED
@@ -33,11 +33,11 @@ LATEST_BLOCK: Optional[int] = None
33
  LATEST_BLOCK_NAME: BlockParams = "latest"
34
  BLOCK_DATA_NUMBER = "number"
35
  BLOCKS_CHUNK_SIZE = 10_000
36
- N_IPFS_RETRIES = 1
37
  N_RPC_RETRIES = 100
38
  RPC_POLL_INTERVAL = 0.05
39
- # IPFS_POLL_INTERVAL = 0.05 # low speed
40
- IPFS_POLL_INTERVAL = 0.2 # high speed
41
  OMEN_SUBGRAPH_URL = Template(
42
  """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
43
  )
 
33
  LATEST_BLOCK_NAME: BlockParams = "latest"
34
  BLOCK_DATA_NUMBER = "number"
35
  BLOCKS_CHUNK_SIZE = 10_000
36
+ N_IPFS_RETRIES = 2
37
  N_RPC_RETRIES = 100
38
  RPC_POLL_INTERVAL = 0.05
39
+ SUBGRAPH_POLL_INTERVAL = 0.05
40
+ IPFS_POLL_INTERVAL = 0.2 # 5 calls per second
41
  OMEN_SUBGRAPH_URL = Template(
42
  """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
43
  )