rosacastillo commited on
Commit
b60f995
·
1 Parent(s): 6992ec1

weekly data and some fixes

Browse files
data/all_trades_profitability.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08d3f27f13f20022a8aa176e3f84559a38c001d03753fd13101eae15ab4a1123
3
- size 6500230
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04134fbaceed13c1c02eeb952134df58ec787afddcba0e09b69b7ea5ae8693cd
3
+ size 6590780
data/daily_info.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:689d0f6abf1886fb0b089db0294e07c551485005fabbaf73ea36083d6388ce5f
3
- size 78026
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:880e896031e8e87ddc4e9fc952941d7bbe5196b86e56ebdf003e572182db6b76
3
+ size 1050038
data/error_by_markets.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7832acf3c67797c32225d4c253d651ffcbd4aba4bb19354ea00ad0c1d7200cc1
3
- size 13489
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b0f69c6df66a53208bd47167d39db8ef7ec965a47a1a90fe429815cc44cbad9
3
+ size 14168
data/invalid_trades.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6156d839803a0c09606450ead26fb3b31ef6ef30bf3507ccbd0dc0399ad366f7
3
- size 152273
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f29e5a59d7a2c60ef8408cddf58785f601a472105a99bd7339a45de8389fa36f
3
+ size 159829
data/tools_accuracy.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab13e55237b5f9bee0012eab8976b0ea3ef1518f0071a081d9052f97061315a7
3
- size 1100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a521cd7735fbd9aecd1fce18836b08dc03cc58dca1f52add0b60ec2bbb00f722
3
+ size 1099
data/unknown_traders.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3620eaba76778060f41059fb2b6ff6e92a6000eedfd9a9119b703f84cdda11ff
3
- size 194084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c237838416f8339b145719e5b370df1d9763099f9e5fa3e75d4d69053e5d311
3
+ size 200722
data/winning_df.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ad71600af38478d3dc275bf7ad0f3cd3cd46d44d1bf54f06bb6f9d9540cc041
3
- size 13714
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c31991027458844d3c8e72da29bb9acd7bcef8ad31ae7c95a59ee168a34ba58
3
+ size 14407
notebooks/mech_calls_analysis.ipynb CHANGED
@@ -2,26 +2,132 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
9
  "import pandas as pd\n",
10
  "import matplotlib.pyplot as plt\n",
11
  "import seaborn as sns\n",
12
- "import gc\n",
13
  "sns.set_style(\"darkgrid\")"
14
  ]
15
  },
16
  {
17
  "cell_type": "code",
18
- "execution_count": 2,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "metadata": {},
20
  "outputs": [],
21
  "source": [
22
  "all_trades = pd.read_parquet('../data/all_trades_profitability.parquet')"
23
  ]
24
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  {
26
  "cell_type": "code",
27
  "execution_count": 3,
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 7,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
9
  "import pandas as pd\n",
10
  "import matplotlib.pyplot as plt\n",
11
  "import seaborn as sns\n",
12
+ "from datetime import datetime, timezone\n",
13
  "sns.set_style(\"darkgrid\")"
14
  ]
15
  },
16
  {
17
  "cell_type": "code",
18
+ "execution_count": 8,
19
+ "metadata": {},
20
+ "outputs": [],
21
+ "source": [
22
+ "new_trades = pd.read_parquet('../tmp/new_fpmmTrades.parquet')"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": 9,
28
+ "metadata": {},
29
+ "outputs": [],
30
+ "source": [
31
+ "def transform_to_datetime(x):\n",
32
+ " return datetime.fromtimestamp(int(x), tz=timezone.utc)\n"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 10,
38
+ "metadata": {},
39
+ "outputs": [],
40
+ "source": [
41
+ "new_trades[\"creationTimestamp\"] = new_trades[\"creationTimestamp\"].apply(\n",
42
+ " lambda x: transform_to_datetime(x)\n",
43
+ ")"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 5,
49
+ "metadata": {},
50
+ "outputs": [
51
+ {
52
+ "data": {
53
+ "text/plain": [
54
+ "Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n",
55
+ " 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n",
56
+ " 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n",
57
+ " 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n",
58
+ " 'transactionHash', 'type', 'market_creator',\n",
59
+ " 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n",
60
+ " 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n",
61
+ " 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n",
62
+ " 'fpmm.condition.id'],\n",
63
+ " dtype='object')"
64
+ ]
65
+ },
66
+ "execution_count": 5,
67
+ "metadata": {},
68
+ "output_type": "execute_result"
69
+ }
70
+ ],
71
+ "source": [
72
+ "new_trades.columns"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": 11,
78
+ "metadata": {},
79
+ "outputs": [
80
+ {
81
+ "data": {
82
+ "text/plain": [
83
+ "Timestamp('2025-01-07 09:01:05+0000', tz='UTC')"
84
+ ]
85
+ },
86
+ "execution_count": 11,
87
+ "metadata": {},
88
+ "output_type": "execute_result"
89
+ }
90
+ ],
91
+ "source": [
92
+ "min(new_trades.creationTimestamp)"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "execution_count": null,
98
+ "metadata": {},
99
+ "outputs": [],
100
+ "source": []
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "execution_count": 12,
105
  "metadata": {},
106
  "outputs": [],
107
  "source": [
108
  "all_trades = pd.read_parquet('../data/all_trades_profitability.parquet')"
109
  ]
110
  },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": 13,
114
+ "metadata": {},
115
+ "outputs": [
116
+ {
117
+ "data": {
118
+ "text/plain": [
119
+ "Timestamp('2025-01-07 04:43:00+0000', tz='UTC')"
120
+ ]
121
+ },
122
+ "execution_count": 13,
123
+ "metadata": {},
124
+ "output_type": "execute_result"
125
+ }
126
+ ],
127
+ "source": [
128
+ "max(all_trades.creation_timestamp)"
129
+ ]
130
+ },
131
  {
132
  "cell_type": "code",
133
  "execution_count": 3,
scripts/daily_data.py CHANGED
@@ -10,6 +10,7 @@ from nr_mech_calls import (
10
  compute_daily_mech_calls,
11
  transform_to_datetime,
12
  )
 
13
 
14
  logging.basicConfig(level=logging.INFO)
15
 
@@ -21,12 +22,15 @@ def prepare_live_metrics(
21
  fpmmTrades = pd.read_parquet(TMP_DIR / trades_filename)
22
  tools = pd.read_parquet(TMP_DIR / tools_filename)
23
 
 
24
  try:
25
  fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
26
  lambda x: transform_to_datetime(x)
27
  )
28
  except Exception as e:
29
  print(f"Transformation not needed")
 
 
30
 
31
  print("Computing the estimated mech calls dataset")
32
  trader_mech_calls = compute_daily_mech_calls(fpmmTrades=fpmmTrades, tools=tools)
 
10
  compute_daily_mech_calls,
11
  transform_to_datetime,
12
  )
13
+ from markets import check_current_week_data
14
 
15
  logging.basicConfig(level=logging.INFO)
16
 
 
22
  fpmmTrades = pd.read_parquet(TMP_DIR / trades_filename)
23
  tools = pd.read_parquet(TMP_DIR / tools_filename)
24
 
25
+ # TODO if monday data of the week is missing in new_fpmmTrades then take it from the general file
26
  try:
27
  fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
28
  lambda x: transform_to_datetime(x)
29
  )
30
  except Exception as e:
31
  print(f"Transformation not needed")
32
+ # check missing data from Monday
33
+ fpmmTrades = check_current_week_data(fpmmTrades)
34
 
35
  print("Computing the estimated mech calls dataset")
36
  trader_mech_calls = compute_daily_mech_calls(fpmmTrades=fpmmTrades, tools=tools)
scripts/get_mech_info.py CHANGED
@@ -7,11 +7,11 @@ from utils import (
7
  DATA_DIR,
8
  TMP_DIR,
9
  NETWORK_SUBGRAPH_URL,
 
10
  )
11
  import requests
12
  import pandas as pd
13
  import numpy as np
14
- from gnosis_timestamps import compute_request_time
15
  from mech_request_utils import (
16
  collect_all_mech_delivers,
17
  collect_all_mech_requests,
@@ -21,8 +21,6 @@ from mech_request_utils import (
21
  get_ipfs_data,
22
  merge_json_files,
23
  )
24
- from web3_utils import updating_timestamps
25
- from nr_mech_calls import transform_to_datetime
26
 
27
  SUBGRAPH_HEADERS = {
28
  "Accept": "application/json, multipart/mixed",
@@ -311,7 +309,7 @@ def get_mech_info_last_60_days() -> dict[str, Any]:
311
 
312
 
313
  @measure_execution_time
314
- def get_mech_events_since_last_run():
315
  """Function to download only the new events since the last execution."""
316
 
317
  # Read the latest date from stored data
@@ -371,7 +369,7 @@ def get_mech_events_since_last_run():
371
  )
372
 
373
  # Add ipfs contents
374
- get_ipfs_data("new_merged_requests.json", "new_tools_info.json")
375
  return latest_timestamp
376
 
377
 
 
7
  DATA_DIR,
8
  TMP_DIR,
9
  NETWORK_SUBGRAPH_URL,
10
+ transform_to_datetime,
11
  )
12
  import requests
13
  import pandas as pd
14
  import numpy as np
 
15
  from mech_request_utils import (
16
  collect_all_mech_delivers,
17
  collect_all_mech_requests,
 
21
  get_ipfs_data,
22
  merge_json_files,
23
  )
 
 
24
 
25
  SUBGRAPH_HEADERS = {
26
  "Accept": "application/json, multipart/mixed",
 
309
 
310
 
311
  @measure_execution_time
312
+ def get_mech_events_since_last_run(logger):
313
  """Function to download only the new events since the last execution."""
314
 
315
  # Read the latest date from stored data
 
369
  )
370
 
371
  # Add ipfs contents
372
+ get_ipfs_data("new_merged_requests.json", "new_tools_info.json", logger)
373
  return latest_timestamp
374
 
375
 
scripts/markets.py CHANGED
@@ -19,13 +19,13 @@
19
 
20
  import functools
21
  import warnings
22
- from string import Template
23
  from typing import Optional, Generator, Callable
24
  import pandas as pd
25
  import requests
26
  from tqdm import tqdm
27
  from typing import List, Dict
28
- from utils import SUBGRAPH_API_KEY, DATA_DIR, TMP_DIR
29
  from web3_utils import (
30
  FPMM_QS_CREATOR,
31
  FPMM_PEARL_CREATOR,
@@ -299,14 +299,19 @@ def etl(filename: Optional[str] = None) -> pd.DataFrame:
299
  return fpmms
300
 
301
 
302
- def add_market_creator(tools: pd.DataFrame) -> None:
303
- # Check if fpmmTrades.parquet is in the same directory
304
  try:
305
  trades_filename = "fpmmTrades.parquet"
306
  fpmms_trades = pd.read_parquet(TMP_DIR / trades_filename)
307
  except FileNotFoundError:
308
  print("Error: fpmmTrades.parquet not found. No market creator added")
309
  return
 
 
 
 
 
 
310
  tools["market_creator"] = ""
311
  # traverse the list of traders
312
  tools_no_market_creator = 0
@@ -346,5 +351,35 @@ def fpmmTrades_etl(rpc: str, trades_filename: str, from_timestamp: str) -> None:
346
  return
347
 
348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  if __name__ == "__main__":
350
  etl("all_fpmms.parquet")
 
19
 
20
  import functools
21
  import warnings
22
+ from datetime import datetime, timedelta
23
  from typing import Optional, Generator, Callable
24
  import pandas as pd
25
  import requests
26
  from tqdm import tqdm
27
  from typing import List, Dict
28
+ from utils import SUBGRAPH_API_KEY, DATA_DIR, TMP_DIR, transform_to_datetime
29
  from web3_utils import (
30
  FPMM_QS_CREATOR,
31
  FPMM_PEARL_CREATOR,
 
299
  return fpmms
300
 
301
 
302
+ def read_global_trades_file() -> pd.DataFrame:
 
303
  try:
304
  trades_filename = "fpmmTrades.parquet"
305
  fpmms_trades = pd.read_parquet(TMP_DIR / trades_filename)
306
  except FileNotFoundError:
307
  print("Error: fpmmTrades.parquet not found. No market creator added")
308
  return
309
+ return fpmms_trades
310
+
311
+
312
+ def add_market_creator(tools: pd.DataFrame) -> None:
313
+ # Check if fpmmTrades.parquet is in the same directory
314
+ fpmms_trades = read_global_trades_file()
315
  tools["market_creator"] = ""
316
  # traverse the list of traders
317
  tools_no_market_creator = 0
 
351
  return
352
 
353
 
354
+ def check_current_week_data(trades_df: pd.DataFrame) -> pd.DataFrame:
355
+ """Function to check if all current weeks data is present, if not, then add the missing data from previous file"""
356
+ # Get current date
357
+ now = datetime.now()
358
+
359
+ # Get start of the current week (Monday)
360
+ start_of_week = now - timedelta(days=now.weekday())
361
+ start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
362
+ print(f"start of the week = {start_of_week}")
363
+
364
+ trades_df["creation_timestamp"] = pd.to_datetime(trades_df["creationTimestamp"])
365
+ trades_df["creation_date"] = trades_df["creation_timestamp"].dt.date
366
+ trades_df["creation_date"] = pd.to_datetime(trades_df["creation_date"])
367
+ # Check dataframe
368
+ min_date = min(trades_df.creation_date)
369
+ if min_date > start_of_week:
370
+ # missing data of current week in the trades file
371
+ fpmms_trades = read_global_trades_file()
372
+ # get missing data
373
+ missing_data = fpmms_trades[
374
+ (fpmms_trades["creation_date"] >= start_of_week)
375
+ & (fpmms_trades["creation_date"] < min_date)
376
+ ]
377
+ merge_df = pd.concat([trades_df, missing_data], ignore_index=True)
378
+ merge_df.drop_duplicates("id", keep="last", inplace=True)
379
+ return merge_df
380
+ # no update needed
381
+ return trades_df
382
+
383
+
384
  if __name__ == "__main__":
385
  etl("all_fpmms.parquet")
scripts/mech_request_utils.py CHANGED
@@ -40,14 +40,20 @@ from web3_utils import (
40
  SUBGRAPH_POLL_INTERVAL,
41
  )
42
  from concurrent.futures import ThreadPoolExecutor, as_completed
43
- from utils import DATA_DIR, JSON_DATA_DIR, MECH_SUBGRAPH_URL, SUBGRAPH_API_KEY
 
 
 
 
 
 
44
 
45
  NUM_WORKERS = 10
46
  BLOCKS_CHUNK_SIZE = 10000
47
  TEXT_ALIGNMENT = 30
48
  MINIMUM_WRITE_FILE_DELAY_SECONDS = 20
49
  MECH_FROM_BLOCK_RANGE = 50000
50
- IPFS_ADDRESS = "https://gateway.autonolas.tech/ipfs/"
51
  last_write_time = 0.0
52
 
53
  REQUESTS_QUERY_FILTER = """
@@ -501,14 +507,14 @@ def merge_requests_delivers(
501
  return
502
 
503
 
504
- def get_ipfs_data(input_filename: str, output_filename: str):
505
  with open(JSON_DATA_DIR / input_filename, "r") as file:
506
  mech_requests = json.load(file)
507
 
508
  total_keys_to_traverse = list(mech_requests.keys())
509
  updated_mech_requests = dict()
510
  session = create_session()
511
- print("UPDATING IPFS CONTENTS OF REQUESTS")
512
  # requests
513
  nr_errors = 0
514
  with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
@@ -533,11 +539,11 @@ def get_ipfs_data(input_filename: str, output_filename: str):
533
  updated_mech_requests.update(partial_dict)
534
 
535
  save_json_file(updated_mech_requests, output_filename)
536
- print(f"NUMBER OF MECH REQUEST IPFS ERRORS={nr_errors}")
537
 
538
  # delivers
539
  nr_deliver_errors = 0
540
- print("UPDATING IPFS CONTENTS OF DELIVERS")
541
  total_keys_to_traverse = list(updated_mech_requests.keys())
542
  final_tools_content = {}
543
  with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
@@ -562,7 +568,7 @@ def get_ipfs_data(input_filename: str, output_filename: str):
562
  final_tools_content.update(partial_dict)
563
 
564
  save_json_file(final_tools_content, output_filename)
565
- print(f"NUMBER OF MECH DELIVERS IPFS ERRORS={nr_deliver_errors}")
566
 
567
 
568
  def only_delivers_loop():
 
40
  SUBGRAPH_POLL_INTERVAL,
41
  )
42
  from concurrent.futures import ThreadPoolExecutor, as_completed
43
+ from utils import (
44
+ DATA_DIR,
45
+ JSON_DATA_DIR,
46
+ MECH_SUBGRAPH_URL,
47
+ SUBGRAPH_API_KEY,
48
+ IPFS_ADDRESS,
49
+ )
50
 
51
  NUM_WORKERS = 10
52
  BLOCKS_CHUNK_SIZE = 10000
53
  TEXT_ALIGNMENT = 30
54
  MINIMUM_WRITE_FILE_DELAY_SECONDS = 20
55
  MECH_FROM_BLOCK_RANGE = 50000
56
+
57
  last_write_time = 0.0
58
 
59
  REQUESTS_QUERY_FILTER = """
 
507
  return
508
 
509
 
510
+ def get_ipfs_data(input_filename: str, output_filename: str, logger):
511
  with open(JSON_DATA_DIR / input_filename, "r") as file:
512
  mech_requests = json.load(file)
513
 
514
  total_keys_to_traverse = list(mech_requests.keys())
515
  updated_mech_requests = dict()
516
  session = create_session()
517
+ logger.info("UPDATING IPFS CONTENTS OF REQUESTS")
518
  # requests
519
  nr_errors = 0
520
  with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
 
539
  updated_mech_requests.update(partial_dict)
540
 
541
  save_json_file(updated_mech_requests, output_filename)
542
+ logger.info(f"NUMBER OF MECH REQUEST IPFS ERRORS={nr_errors}")
543
 
544
  # delivers
545
  nr_deliver_errors = 0
546
+ logger.info("UPDATING IPFS CONTENTS OF DELIVERS")
547
  total_keys_to_traverse = list(updated_mech_requests.keys())
548
  final_tools_content = {}
549
  with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
 
568
  final_tools_content.update(partial_dict)
569
 
570
  save_json_file(final_tools_content, output_filename)
571
+ logger.info(f"NUMBER OF MECH DELIVERS IPFS ERRORS={nr_deliver_errors}")
572
 
573
 
574
  def only_delivers_loop():
scripts/nr_mech_calls.py CHANGED
@@ -1,17 +1,13 @@
1
  import pandas as pd
2
- from utils import DATA_DIR, DEFAULT_MECH_FEE, TMP_DIR
3
  from tqdm import tqdm
4
- from datetime import datetime, timezone
5
  from typing import Dict, Any
6
  from collections import defaultdict
7
  from tools import IRRELEVANT_TOOLS
8
  import re
9
 
10
 
11
- def transform_to_datetime(x):
12
- return datetime.fromtimestamp(int(x), tz=timezone.utc)
13
-
14
-
15
  def update_roi(row: pd.DataFrame) -> float:
16
  new_value = row.net_earnings / (
17
  row.collateral_amount
 
1
  import pandas as pd
2
+ from utils import DATA_DIR, DEFAULT_MECH_FEE, TMP_DIR, transform_to_datetime
3
  from tqdm import tqdm
4
+
5
  from typing import Dict, Any
6
  from collections import defaultdict
7
  from tools import IRRELEVANT_TOOLS
8
  import re
9
 
10
 
 
 
 
 
11
  def update_roi(row: pd.DataFrame) -> float:
12
  new_value = row.net_earnings / (
13
  row.collateral_amount
scripts/pull_data.py CHANGED
@@ -26,7 +26,12 @@ from cloud_storage import load_historical_file
26
  from tools_metrics import compute_tools_based_datasets
27
 
28
 
29
- logging.basicConfig(level=logging.INFO)
 
 
 
 
 
30
 
31
 
32
  def add_current_answer(tools_filename: str):
@@ -88,7 +93,7 @@ def only_new_weekly_analysis():
88
  # Mech events ETL
89
  logging.info("Generating the mech json files")
90
  # get only new data
91
- latest_timestamp = get_mech_events_since_last_run()
92
  if latest_timestamp == None:
93
  print("Error while getting the mech events")
94
  return
@@ -127,7 +132,7 @@ def only_new_weekly_analysis():
127
 
128
  save_historical_data()
129
  try:
130
- clean_old_data_from_parquet_files("2024-11-06")
131
  except Exception as e:
132
  print("Error cleaning the oldest information from parquet files")
133
  print(f"reason = {e}")
 
26
  from tools_metrics import compute_tools_based_datasets
27
 
28
 
29
+ logging.basicConfig(
30
+ level=logging.INFO,
31
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
32
+ datefmt="%Y-%m-%d %H:%M:%S",
33
+ )
34
+ logger = logging.getLogger(__name__)
35
 
36
 
37
  def add_current_answer(tools_filename: str):
 
93
  # Mech events ETL
94
  logging.info("Generating the mech json files")
95
  # get only new data
96
+ latest_timestamp = get_mech_events_since_last_run(logger)
97
  if latest_timestamp == None:
98
  print("Error while getting the mech events")
99
  return
 
132
 
133
  save_historical_data()
134
  try:
135
+ clean_old_data_from_parquet_files("2024-11-12")
136
  except Exception as e:
137
  print("Error cleaning the oldest information from parquet files")
138
  print(f"reason = {e}")
scripts/tools.py CHANGED
@@ -27,6 +27,7 @@ from typing import (
27
  )
28
  import pandas as pd
29
  import requests
 
30
  from gnosis_timestamps import transform_timestamp_to_datetime
31
  from requests.adapters import HTTPAdapter
32
  from tqdm import tqdm
@@ -96,11 +97,17 @@ NUM_WORKERS = 10
96
  GET_CONTENTS_BATCH_SIZE = 1000
97
 
98
 
 
 
 
 
 
 
99
  def create_session() -> requests.Session:
100
  """Create a session with a retry strategy."""
101
  session = requests.Session()
102
- retry_strategy = Retry(
103
- total=N_IPFS_RETRIES + 1,
104
  backoff_factor=BACKOFF_FACTOR,
105
  status_forcelist=STATUS_FORCELIST,
106
  )
 
27
  )
28
  import pandas as pd
29
  import requests
30
+ from datetime import datetime
31
  from gnosis_timestamps import transform_timestamp_to_datetime
32
  from requests.adapters import HTTPAdapter
33
  from tqdm import tqdm
 
97
  GET_CONTENTS_BATCH_SIZE = 1000
98
 
99
 
100
+ class TimestampedRetry(Retry):
101
+ def increment(self, *args, **kwargs):
102
+ print(f"Retry attempt at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
103
+ return super().increment(*args, **kwargs)
104
+
105
+
106
  def create_session() -> requests.Session:
107
  """Create a session with a retry strategy."""
108
  session = requests.Session()
109
+ retry_strategy = TimestampedRetry(
110
+ total=N_IPFS_RETRIES,
111
  backoff_factor=BACKOFF_FACTOR,
112
  status_forcelist=STATUS_FORCELIST,
113
  )
scripts/utils.py CHANGED
@@ -7,6 +7,7 @@ import pandas as pd
7
  import gc
8
  import re
9
  from dataclasses import dataclass
 
10
  from pathlib import Path
11
  from enum import Enum
12
  from string import Template
@@ -29,11 +30,12 @@ REQUEST_SENDER = "sender"
29
  PROMPT_FIELD = "prompt"
30
  HTTP = "http://"
31
  HTTPS = HTTP[:4] + "s" + HTTP[4:]
32
- IPFS_ADDRESS = f"{HTTPS}gateway.autonolas.tech/ipfs/"
33
  FORMAT_UPDATE_BLOCK_NUMBER = 30411638
34
  INVALID_ANSWER_HEX = (
35
  "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
36
  )
 
 
37
 
38
  INC_TOOLS = [
39
  "prediction-online",
@@ -241,6 +243,10 @@ EVENT_TO_MECH_STRUCT = {
241
  }
242
 
243
 
 
 
 
 
244
  def measure_execution_time(func):
245
  def wrapper(*args, **kwargs):
246
  start_time = time.time()
 
7
  import gc
8
  import re
9
  from dataclasses import dataclass
10
+ from datetime import datetime, timezone
11
  from pathlib import Path
12
  from enum import Enum
13
  from string import Template
 
30
  PROMPT_FIELD = "prompt"
31
  HTTP = "http://"
32
  HTTPS = HTTP[:4] + "s" + HTTP[4:]
 
33
  FORMAT_UPDATE_BLOCK_NUMBER = 30411638
34
  INVALID_ANSWER_HEX = (
35
  "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
36
  )
37
+ OLD_IPFS_ADDRESS = "https://gateway.autonolas.tech/ipfs/"
38
+ IPFS_ADDRESS = "https://gateway.gcp.autonolas.tech/ipfs/"
39
 
40
  INC_TOOLS = [
41
  "prediction-online",
 
243
  }
244
 
245
 
246
+ def transform_to_datetime(x):
247
+ return datetime.fromtimestamp(int(x), tz=timezone.utc)
248
+
249
+
250
  def measure_execution_time(func):
251
  def wrapper(*args, **kwargs):
252
  start_time = time.time()
scripts/web3_utils.py CHANGED
@@ -33,7 +33,7 @@ LATEST_BLOCK: Optional[int] = None
33
  LATEST_BLOCK_NAME: BlockParams = "latest"
34
  BLOCK_DATA_NUMBER = "number"
35
  BLOCKS_CHUNK_SIZE = 10_000
36
- N_IPFS_RETRIES = 2
37
  N_RPC_RETRIES = 100
38
  RPC_POLL_INTERVAL = 0.05
39
  SUBGRAPH_POLL_INTERVAL = 0.05
 
33
  LATEST_BLOCK_NAME: BlockParams = "latest"
34
  BLOCK_DATA_NUMBER = "number"
35
  BLOCKS_CHUNK_SIZE = 10_000
36
+ N_IPFS_RETRIES = 4
37
  N_RPC_RETRIES = 100
38
  RPC_POLL_INTERVAL = 0.05
39
  SUBGRAPH_POLL_INTERVAL = 0.05