rosacastillo
commited on
Commit
·
b60f995
1
Parent(s):
6992ec1
weekly data and some fixes
Browse files- data/all_trades_profitability.parquet +2 -2
- data/daily_info.parquet +2 -2
- data/error_by_markets.parquet +2 -2
- data/invalid_trades.parquet +2 -2
- data/tools_accuracy.csv +2 -2
- data/unknown_traders.parquet +2 -2
- data/winning_df.parquet +2 -2
- notebooks/mech_calls_analysis.ipynb +109 -3
- scripts/daily_data.py +4 -0
- scripts/get_mech_info.py +3 -5
- scripts/markets.py +39 -4
- scripts/mech_request_utils.py +13 -7
- scripts/nr_mech_calls.py +2 -6
- scripts/pull_data.py +8 -3
- scripts/tools.py +9 -2
- scripts/utils.py +7 -1
- scripts/web3_utils.py +1 -1
data/all_trades_profitability.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04134fbaceed13c1c02eeb952134df58ec787afddcba0e09b69b7ea5ae8693cd
|
3 |
+
size 6590780
|
data/daily_info.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:880e896031e8e87ddc4e9fc952941d7bbe5196b86e56ebdf003e572182db6b76
|
3 |
+
size 1050038
|
data/error_by_markets.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b0f69c6df66a53208bd47167d39db8ef7ec965a47a1a90fe429815cc44cbad9
|
3 |
+
size 14168
|
data/invalid_trades.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f29e5a59d7a2c60ef8408cddf58785f601a472105a99bd7339a45de8389fa36f
|
3 |
+
size 159829
|
data/tools_accuracy.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a521cd7735fbd9aecd1fce18836b08dc03cc58dca1f52add0b60ec2bbb00f722
|
3 |
+
size 1099
|
data/unknown_traders.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c237838416f8339b145719e5b370df1d9763099f9e5fa3e75d4d69053e5d311
|
3 |
+
size 200722
|
data/winning_df.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c31991027458844d3c8e72da29bb9acd7bcef8ad31ae7c95a59ee168a34ba58
|
3 |
+
size 14407
|
notebooks/mech_calls_analysis.ipynb
CHANGED
@@ -2,26 +2,132 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
9 |
"import pandas as pd\n",
|
10 |
"import matplotlib.pyplot as plt\n",
|
11 |
"import seaborn as sns\n",
|
12 |
-
"import
|
13 |
"sns.set_style(\"darkgrid\")"
|
14 |
]
|
15 |
},
|
16 |
{
|
17 |
"cell_type": "code",
|
18 |
-
"execution_count":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
"metadata": {},
|
20 |
"outputs": [],
|
21 |
"source": [
|
22 |
"all_trades = pd.read_parquet('../data/all_trades_profitability.parquet')"
|
23 |
]
|
24 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
{
|
26 |
"cell_type": "code",
|
27 |
"execution_count": 3,
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 7,
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
9 |
"import pandas as pd\n",
|
10 |
"import matplotlib.pyplot as plt\n",
|
11 |
"import seaborn as sns\n",
|
12 |
+
"from datetime import datetime, timezone\n",
|
13 |
"sns.set_style(\"darkgrid\")"
|
14 |
]
|
15 |
},
|
16 |
{
|
17 |
"cell_type": "code",
|
18 |
+
"execution_count": 8,
|
19 |
+
"metadata": {},
|
20 |
+
"outputs": [],
|
21 |
+
"source": [
|
22 |
+
"new_trades = pd.read_parquet('../tmp/new_fpmmTrades.parquet')"
|
23 |
+
]
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"cell_type": "code",
|
27 |
+
"execution_count": 9,
|
28 |
+
"metadata": {},
|
29 |
+
"outputs": [],
|
30 |
+
"source": [
|
31 |
+
"def transform_to_datetime(x):\n",
|
32 |
+
" return datetime.fromtimestamp(int(x), tz=timezone.utc)\n"
|
33 |
+
]
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"cell_type": "code",
|
37 |
+
"execution_count": 10,
|
38 |
+
"metadata": {},
|
39 |
+
"outputs": [],
|
40 |
+
"source": [
|
41 |
+
"new_trades[\"creationTimestamp\"] = new_trades[\"creationTimestamp\"].apply(\n",
|
42 |
+
" lambda x: transform_to_datetime(x)\n",
|
43 |
+
")"
|
44 |
+
]
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"cell_type": "code",
|
48 |
+
"execution_count": 5,
|
49 |
+
"metadata": {},
|
50 |
+
"outputs": [
|
51 |
+
{
|
52 |
+
"data": {
|
53 |
+
"text/plain": [
|
54 |
+
"Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n",
|
55 |
+
" 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n",
|
56 |
+
" 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n",
|
57 |
+
" 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n",
|
58 |
+
" 'transactionHash', 'type', 'market_creator',\n",
|
59 |
+
" 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n",
|
60 |
+
" 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n",
|
61 |
+
" 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n",
|
62 |
+
" 'fpmm.condition.id'],\n",
|
63 |
+
" dtype='object')"
|
64 |
+
]
|
65 |
+
},
|
66 |
+
"execution_count": 5,
|
67 |
+
"metadata": {},
|
68 |
+
"output_type": "execute_result"
|
69 |
+
}
|
70 |
+
],
|
71 |
+
"source": [
|
72 |
+
"new_trades.columns"
|
73 |
+
]
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"cell_type": "code",
|
77 |
+
"execution_count": 11,
|
78 |
+
"metadata": {},
|
79 |
+
"outputs": [
|
80 |
+
{
|
81 |
+
"data": {
|
82 |
+
"text/plain": [
|
83 |
+
"Timestamp('2025-01-07 09:01:05+0000', tz='UTC')"
|
84 |
+
]
|
85 |
+
},
|
86 |
+
"execution_count": 11,
|
87 |
+
"metadata": {},
|
88 |
+
"output_type": "execute_result"
|
89 |
+
}
|
90 |
+
],
|
91 |
+
"source": [
|
92 |
+
"min(new_trades.creationTimestamp)"
|
93 |
+
]
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"cell_type": "code",
|
97 |
+
"execution_count": null,
|
98 |
+
"metadata": {},
|
99 |
+
"outputs": [],
|
100 |
+
"source": []
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"cell_type": "code",
|
104 |
+
"execution_count": 12,
|
105 |
"metadata": {},
|
106 |
"outputs": [],
|
107 |
"source": [
|
108 |
"all_trades = pd.read_parquet('../data/all_trades_profitability.parquet')"
|
109 |
]
|
110 |
},
|
111 |
+
{
|
112 |
+
"cell_type": "code",
|
113 |
+
"execution_count": 13,
|
114 |
+
"metadata": {},
|
115 |
+
"outputs": [
|
116 |
+
{
|
117 |
+
"data": {
|
118 |
+
"text/plain": [
|
119 |
+
"Timestamp('2025-01-07 04:43:00+0000', tz='UTC')"
|
120 |
+
]
|
121 |
+
},
|
122 |
+
"execution_count": 13,
|
123 |
+
"metadata": {},
|
124 |
+
"output_type": "execute_result"
|
125 |
+
}
|
126 |
+
],
|
127 |
+
"source": [
|
128 |
+
"max(all_trades.creation_timestamp)"
|
129 |
+
]
|
130 |
+
},
|
131 |
{
|
132 |
"cell_type": "code",
|
133 |
"execution_count": 3,
|
scripts/daily_data.py
CHANGED
@@ -10,6 +10,7 @@ from nr_mech_calls import (
|
|
10 |
compute_daily_mech_calls,
|
11 |
transform_to_datetime,
|
12 |
)
|
|
|
13 |
|
14 |
logging.basicConfig(level=logging.INFO)
|
15 |
|
@@ -21,12 +22,15 @@ def prepare_live_metrics(
|
|
21 |
fpmmTrades = pd.read_parquet(TMP_DIR / trades_filename)
|
22 |
tools = pd.read_parquet(TMP_DIR / tools_filename)
|
23 |
|
|
|
24 |
try:
|
25 |
fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
|
26 |
lambda x: transform_to_datetime(x)
|
27 |
)
|
28 |
except Exception as e:
|
29 |
print(f"Transformation not needed")
|
|
|
|
|
30 |
|
31 |
print("Computing the estimated mech calls dataset")
|
32 |
trader_mech_calls = compute_daily_mech_calls(fpmmTrades=fpmmTrades, tools=tools)
|
|
|
10 |
compute_daily_mech_calls,
|
11 |
transform_to_datetime,
|
12 |
)
|
13 |
+
from markets import check_current_week_data
|
14 |
|
15 |
logging.basicConfig(level=logging.INFO)
|
16 |
|
|
|
22 |
fpmmTrades = pd.read_parquet(TMP_DIR / trades_filename)
|
23 |
tools = pd.read_parquet(TMP_DIR / tools_filename)
|
24 |
|
25 |
+
# TODO if monday data of the week is missing in new_fpmmTrades then take it from the general file
|
26 |
try:
|
27 |
fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
|
28 |
lambda x: transform_to_datetime(x)
|
29 |
)
|
30 |
except Exception as e:
|
31 |
print(f"Transformation not needed")
|
32 |
+
# check missing data from Monday
|
33 |
+
fpmmTrades = check_current_week_data(fpmmTrades)
|
34 |
|
35 |
print("Computing the estimated mech calls dataset")
|
36 |
trader_mech_calls = compute_daily_mech_calls(fpmmTrades=fpmmTrades, tools=tools)
|
scripts/get_mech_info.py
CHANGED
@@ -7,11 +7,11 @@ from utils import (
|
|
7 |
DATA_DIR,
|
8 |
TMP_DIR,
|
9 |
NETWORK_SUBGRAPH_URL,
|
|
|
10 |
)
|
11 |
import requests
|
12 |
import pandas as pd
|
13 |
import numpy as np
|
14 |
-
from gnosis_timestamps import compute_request_time
|
15 |
from mech_request_utils import (
|
16 |
collect_all_mech_delivers,
|
17 |
collect_all_mech_requests,
|
@@ -21,8 +21,6 @@ from mech_request_utils import (
|
|
21 |
get_ipfs_data,
|
22 |
merge_json_files,
|
23 |
)
|
24 |
-
from web3_utils import updating_timestamps
|
25 |
-
from nr_mech_calls import transform_to_datetime
|
26 |
|
27 |
SUBGRAPH_HEADERS = {
|
28 |
"Accept": "application/json, multipart/mixed",
|
@@ -311,7 +309,7 @@ def get_mech_info_last_60_days() -> dict[str, Any]:
|
|
311 |
|
312 |
|
313 |
@measure_execution_time
|
314 |
-
def get_mech_events_since_last_run():
|
315 |
"""Function to download only the new events since the last execution."""
|
316 |
|
317 |
# Read the latest date from stored data
|
@@ -371,7 +369,7 @@ def get_mech_events_since_last_run():
|
|
371 |
)
|
372 |
|
373 |
# Add ipfs contents
|
374 |
-
get_ipfs_data("new_merged_requests.json", "new_tools_info.json")
|
375 |
return latest_timestamp
|
376 |
|
377 |
|
|
|
7 |
DATA_DIR,
|
8 |
TMP_DIR,
|
9 |
NETWORK_SUBGRAPH_URL,
|
10 |
+
transform_to_datetime,
|
11 |
)
|
12 |
import requests
|
13 |
import pandas as pd
|
14 |
import numpy as np
|
|
|
15 |
from mech_request_utils import (
|
16 |
collect_all_mech_delivers,
|
17 |
collect_all_mech_requests,
|
|
|
21 |
get_ipfs_data,
|
22 |
merge_json_files,
|
23 |
)
|
|
|
|
|
24 |
|
25 |
SUBGRAPH_HEADERS = {
|
26 |
"Accept": "application/json, multipart/mixed",
|
|
|
309 |
|
310 |
|
311 |
@measure_execution_time
|
312 |
+
def get_mech_events_since_last_run(logger):
|
313 |
"""Function to download only the new events since the last execution."""
|
314 |
|
315 |
# Read the latest date from stored data
|
|
|
369 |
)
|
370 |
|
371 |
# Add ipfs contents
|
372 |
+
get_ipfs_data("new_merged_requests.json", "new_tools_info.json", logger)
|
373 |
return latest_timestamp
|
374 |
|
375 |
|
scripts/markets.py
CHANGED
@@ -19,13 +19,13 @@
|
|
19 |
|
20 |
import functools
|
21 |
import warnings
|
22 |
-
from
|
23 |
from typing import Optional, Generator, Callable
|
24 |
import pandas as pd
|
25 |
import requests
|
26 |
from tqdm import tqdm
|
27 |
from typing import List, Dict
|
28 |
-
from utils import SUBGRAPH_API_KEY, DATA_DIR, TMP_DIR
|
29 |
from web3_utils import (
|
30 |
FPMM_QS_CREATOR,
|
31 |
FPMM_PEARL_CREATOR,
|
@@ -299,14 +299,19 @@ def etl(filename: Optional[str] = None) -> pd.DataFrame:
|
|
299 |
return fpmms
|
300 |
|
301 |
|
302 |
-
def
|
303 |
-
# Check if fpmmTrades.parquet is in the same directory
|
304 |
try:
|
305 |
trades_filename = "fpmmTrades.parquet"
|
306 |
fpmms_trades = pd.read_parquet(TMP_DIR / trades_filename)
|
307 |
except FileNotFoundError:
|
308 |
print("Error: fpmmTrades.parquet not found. No market creator added")
|
309 |
return
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
tools["market_creator"] = ""
|
311 |
# traverse the list of traders
|
312 |
tools_no_market_creator = 0
|
@@ -346,5 +351,35 @@ def fpmmTrades_etl(rpc: str, trades_filename: str, from_timestamp: str) -> None:
|
|
346 |
return
|
347 |
|
348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
349 |
if __name__ == "__main__":
|
350 |
etl("all_fpmms.parquet")
|
|
|
19 |
|
20 |
import functools
|
21 |
import warnings
|
22 |
+
from datetime import datetime, timedelta
|
23 |
from typing import Optional, Generator, Callable
|
24 |
import pandas as pd
|
25 |
import requests
|
26 |
from tqdm import tqdm
|
27 |
from typing import List, Dict
|
28 |
+
from utils import SUBGRAPH_API_KEY, DATA_DIR, TMP_DIR, transform_to_datetime
|
29 |
from web3_utils import (
|
30 |
FPMM_QS_CREATOR,
|
31 |
FPMM_PEARL_CREATOR,
|
|
|
299 |
return fpmms
|
300 |
|
301 |
|
302 |
+
def read_global_trades_file() -> pd.DataFrame:
|
|
|
303 |
try:
|
304 |
trades_filename = "fpmmTrades.parquet"
|
305 |
fpmms_trades = pd.read_parquet(TMP_DIR / trades_filename)
|
306 |
except FileNotFoundError:
|
307 |
print("Error: fpmmTrades.parquet not found. No market creator added")
|
308 |
return
|
309 |
+
return fpmms_trades
|
310 |
+
|
311 |
+
|
312 |
+
def add_market_creator(tools: pd.DataFrame) -> None:
|
313 |
+
# Check if fpmmTrades.parquet is in the same directory
|
314 |
+
fpmms_trades = read_global_trades_file()
|
315 |
tools["market_creator"] = ""
|
316 |
# traverse the list of traders
|
317 |
tools_no_market_creator = 0
|
|
|
351 |
return
|
352 |
|
353 |
|
354 |
+
def check_current_week_data(trades_df: pd.DataFrame) -> pd.DataFrame:
|
355 |
+
"""Function to check if all current weeks data is present, if not, then add the missing data from previous file"""
|
356 |
+
# Get current date
|
357 |
+
now = datetime.now()
|
358 |
+
|
359 |
+
# Get start of the current week (Monday)
|
360 |
+
start_of_week = now - timedelta(days=now.weekday())
|
361 |
+
start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
|
362 |
+
print(f"start of the week = {start_of_week}")
|
363 |
+
|
364 |
+
trades_df["creation_timestamp"] = pd.to_datetime(trades_df["creationTimestamp"])
|
365 |
+
trades_df["creation_date"] = trades_df["creation_timestamp"].dt.date
|
366 |
+
trades_df["creation_date"] = pd.to_datetime(trades_df["creation_date"])
|
367 |
+
# Check dataframe
|
368 |
+
min_date = min(trades_df.creation_date)
|
369 |
+
if min_date > start_of_week:
|
370 |
+
# missing data of current week in the trades file
|
371 |
+
fpmms_trades = read_global_trades_file()
|
372 |
+
# get missing data
|
373 |
+
missing_data = fpmms_trades[
|
374 |
+
(fpmms_trades["creation_date"] >= start_of_week)
|
375 |
+
& (fpmms_trades["creation_date"] < min_date)
|
376 |
+
]
|
377 |
+
merge_df = pd.concat([trades_df, missing_data], ignore_index=True)
|
378 |
+
merge_df.drop_duplicates("id", keep="last", inplace=True)
|
379 |
+
return merge_df
|
380 |
+
# no update needed
|
381 |
+
return trades_df
|
382 |
+
|
383 |
+
|
384 |
if __name__ == "__main__":
|
385 |
etl("all_fpmms.parquet")
|
scripts/mech_request_utils.py
CHANGED
@@ -40,14 +40,20 @@ from web3_utils import (
|
|
40 |
SUBGRAPH_POLL_INTERVAL,
|
41 |
)
|
42 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
43 |
-
from utils import
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
NUM_WORKERS = 10
|
46 |
BLOCKS_CHUNK_SIZE = 10000
|
47 |
TEXT_ALIGNMENT = 30
|
48 |
MINIMUM_WRITE_FILE_DELAY_SECONDS = 20
|
49 |
MECH_FROM_BLOCK_RANGE = 50000
|
50 |
-
|
51 |
last_write_time = 0.0
|
52 |
|
53 |
REQUESTS_QUERY_FILTER = """
|
@@ -501,14 +507,14 @@ def merge_requests_delivers(
|
|
501 |
return
|
502 |
|
503 |
|
504 |
-
def get_ipfs_data(input_filename: str, output_filename: str):
|
505 |
with open(JSON_DATA_DIR / input_filename, "r") as file:
|
506 |
mech_requests = json.load(file)
|
507 |
|
508 |
total_keys_to_traverse = list(mech_requests.keys())
|
509 |
updated_mech_requests = dict()
|
510 |
session = create_session()
|
511 |
-
|
512 |
# requests
|
513 |
nr_errors = 0
|
514 |
with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
|
@@ -533,11 +539,11 @@ def get_ipfs_data(input_filename: str, output_filename: str):
|
|
533 |
updated_mech_requests.update(partial_dict)
|
534 |
|
535 |
save_json_file(updated_mech_requests, output_filename)
|
536 |
-
|
537 |
|
538 |
# delivers
|
539 |
nr_deliver_errors = 0
|
540 |
-
|
541 |
total_keys_to_traverse = list(updated_mech_requests.keys())
|
542 |
final_tools_content = {}
|
543 |
with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
|
@@ -562,7 +568,7 @@ def get_ipfs_data(input_filename: str, output_filename: str):
|
|
562 |
final_tools_content.update(partial_dict)
|
563 |
|
564 |
save_json_file(final_tools_content, output_filename)
|
565 |
-
|
566 |
|
567 |
|
568 |
def only_delivers_loop():
|
|
|
40 |
SUBGRAPH_POLL_INTERVAL,
|
41 |
)
|
42 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
43 |
+
from utils import (
|
44 |
+
DATA_DIR,
|
45 |
+
JSON_DATA_DIR,
|
46 |
+
MECH_SUBGRAPH_URL,
|
47 |
+
SUBGRAPH_API_KEY,
|
48 |
+
IPFS_ADDRESS,
|
49 |
+
)
|
50 |
|
51 |
NUM_WORKERS = 10
|
52 |
BLOCKS_CHUNK_SIZE = 10000
|
53 |
TEXT_ALIGNMENT = 30
|
54 |
MINIMUM_WRITE_FILE_DELAY_SECONDS = 20
|
55 |
MECH_FROM_BLOCK_RANGE = 50000
|
56 |
+
|
57 |
last_write_time = 0.0
|
58 |
|
59 |
REQUESTS_QUERY_FILTER = """
|
|
|
507 |
return
|
508 |
|
509 |
|
510 |
+
def get_ipfs_data(input_filename: str, output_filename: str, logger):
|
511 |
with open(JSON_DATA_DIR / input_filename, "r") as file:
|
512 |
mech_requests = json.load(file)
|
513 |
|
514 |
total_keys_to_traverse = list(mech_requests.keys())
|
515 |
updated_mech_requests = dict()
|
516 |
session = create_session()
|
517 |
+
logger.info("UPDATING IPFS CONTENTS OF REQUESTS")
|
518 |
# requests
|
519 |
nr_errors = 0
|
520 |
with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
|
|
|
539 |
updated_mech_requests.update(partial_dict)
|
540 |
|
541 |
save_json_file(updated_mech_requests, output_filename)
|
542 |
+
logger.info(f"NUMBER OF MECH REQUEST IPFS ERRORS={nr_errors}")
|
543 |
|
544 |
# delivers
|
545 |
nr_deliver_errors = 0
|
546 |
+
logger.info("UPDATING IPFS CONTENTS OF DELIVERS")
|
547 |
total_keys_to_traverse = list(updated_mech_requests.keys())
|
548 |
final_tools_content = {}
|
549 |
with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
|
|
|
568 |
final_tools_content.update(partial_dict)
|
569 |
|
570 |
save_json_file(final_tools_content, output_filename)
|
571 |
+
logger.info(f"NUMBER OF MECH DELIVERS IPFS ERRORS={nr_deliver_errors}")
|
572 |
|
573 |
|
574 |
def only_delivers_loop():
|
scripts/nr_mech_calls.py
CHANGED
@@ -1,17 +1,13 @@
|
|
1 |
import pandas as pd
|
2 |
-
from utils import DATA_DIR, DEFAULT_MECH_FEE, TMP_DIR
|
3 |
from tqdm import tqdm
|
4 |
-
|
5 |
from typing import Dict, Any
|
6 |
from collections import defaultdict
|
7 |
from tools import IRRELEVANT_TOOLS
|
8 |
import re
|
9 |
|
10 |
|
11 |
-
def transform_to_datetime(x):
|
12 |
-
return datetime.fromtimestamp(int(x), tz=timezone.utc)
|
13 |
-
|
14 |
-
|
15 |
def update_roi(row: pd.DataFrame) -> float:
|
16 |
new_value = row.net_earnings / (
|
17 |
row.collateral_amount
|
|
|
1 |
import pandas as pd
|
2 |
+
from utils import DATA_DIR, DEFAULT_MECH_FEE, TMP_DIR, transform_to_datetime
|
3 |
from tqdm import tqdm
|
4 |
+
|
5 |
from typing import Dict, Any
|
6 |
from collections import defaultdict
|
7 |
from tools import IRRELEVANT_TOOLS
|
8 |
import re
|
9 |
|
10 |
|
|
|
|
|
|
|
|
|
11 |
def update_roi(row: pd.DataFrame) -> float:
|
12 |
new_value = row.net_earnings / (
|
13 |
row.collateral_amount
|
scripts/pull_data.py
CHANGED
@@ -26,7 +26,12 @@ from cloud_storage import load_historical_file
|
|
26 |
from tools_metrics import compute_tools_based_datasets
|
27 |
|
28 |
|
29 |
-
logging.basicConfig(
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
|
32 |
def add_current_answer(tools_filename: str):
|
@@ -88,7 +93,7 @@ def only_new_weekly_analysis():
|
|
88 |
# Mech events ETL
|
89 |
logging.info("Generating the mech json files")
|
90 |
# get only new data
|
91 |
-
latest_timestamp = get_mech_events_since_last_run()
|
92 |
if latest_timestamp == None:
|
93 |
print("Error while getting the mech events")
|
94 |
return
|
@@ -127,7 +132,7 @@ def only_new_weekly_analysis():
|
|
127 |
|
128 |
save_historical_data()
|
129 |
try:
|
130 |
-
clean_old_data_from_parquet_files("2024-11-
|
131 |
except Exception as e:
|
132 |
print("Error cleaning the oldest information from parquet files")
|
133 |
print(f"reason = {e}")
|
|
|
26 |
from tools_metrics import compute_tools_based_datasets
|
27 |
|
28 |
|
29 |
+
logging.basicConfig(
|
30 |
+
level=logging.INFO,
|
31 |
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
32 |
+
datefmt="%Y-%m-%d %H:%M:%S",
|
33 |
+
)
|
34 |
+
logger = logging.getLogger(__name__)
|
35 |
|
36 |
|
37 |
def add_current_answer(tools_filename: str):
|
|
|
93 |
# Mech events ETL
|
94 |
logging.info("Generating the mech json files")
|
95 |
# get only new data
|
96 |
+
latest_timestamp = get_mech_events_since_last_run(logger)
|
97 |
if latest_timestamp == None:
|
98 |
print("Error while getting the mech events")
|
99 |
return
|
|
|
132 |
|
133 |
save_historical_data()
|
134 |
try:
|
135 |
+
clean_old_data_from_parquet_files("2024-11-12")
|
136 |
except Exception as e:
|
137 |
print("Error cleaning the oldest information from parquet files")
|
138 |
print(f"reason = {e}")
|
scripts/tools.py
CHANGED
@@ -27,6 +27,7 @@ from typing import (
|
|
27 |
)
|
28 |
import pandas as pd
|
29 |
import requests
|
|
|
30 |
from gnosis_timestamps import transform_timestamp_to_datetime
|
31 |
from requests.adapters import HTTPAdapter
|
32 |
from tqdm import tqdm
|
@@ -96,11 +97,17 @@ NUM_WORKERS = 10
|
|
96 |
GET_CONTENTS_BATCH_SIZE = 1000
|
97 |
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
def create_session() -> requests.Session:
|
100 |
"""Create a session with a retry strategy."""
|
101 |
session = requests.Session()
|
102 |
-
retry_strategy =
|
103 |
-
total=N_IPFS_RETRIES
|
104 |
backoff_factor=BACKOFF_FACTOR,
|
105 |
status_forcelist=STATUS_FORCELIST,
|
106 |
)
|
|
|
27 |
)
|
28 |
import pandas as pd
|
29 |
import requests
|
30 |
+
from datetime import datetime
|
31 |
from gnosis_timestamps import transform_timestamp_to_datetime
|
32 |
from requests.adapters import HTTPAdapter
|
33 |
from tqdm import tqdm
|
|
|
97 |
GET_CONTENTS_BATCH_SIZE = 1000
|
98 |
|
99 |
|
100 |
+
class TimestampedRetry(Retry):
|
101 |
+
def increment(self, *args, **kwargs):
|
102 |
+
print(f"Retry attempt at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
103 |
+
return super().increment(*args, **kwargs)
|
104 |
+
|
105 |
+
|
106 |
def create_session() -> requests.Session:
|
107 |
"""Create a session with a retry strategy."""
|
108 |
session = requests.Session()
|
109 |
+
retry_strategy = TimestampedRetry(
|
110 |
+
total=N_IPFS_RETRIES,
|
111 |
backoff_factor=BACKOFF_FACTOR,
|
112 |
status_forcelist=STATUS_FORCELIST,
|
113 |
)
|
scripts/utils.py
CHANGED
@@ -7,6 +7,7 @@ import pandas as pd
|
|
7 |
import gc
|
8 |
import re
|
9 |
from dataclasses import dataclass
|
|
|
10 |
from pathlib import Path
|
11 |
from enum import Enum
|
12 |
from string import Template
|
@@ -29,11 +30,12 @@ REQUEST_SENDER = "sender"
|
|
29 |
PROMPT_FIELD = "prompt"
|
30 |
HTTP = "http://"
|
31 |
HTTPS = HTTP[:4] + "s" + HTTP[4:]
|
32 |
-
IPFS_ADDRESS = f"{HTTPS}gateway.autonolas.tech/ipfs/"
|
33 |
FORMAT_UPDATE_BLOCK_NUMBER = 30411638
|
34 |
INVALID_ANSWER_HEX = (
|
35 |
"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
|
36 |
)
|
|
|
|
|
37 |
|
38 |
INC_TOOLS = [
|
39 |
"prediction-online",
|
@@ -241,6 +243,10 @@ EVENT_TO_MECH_STRUCT = {
|
|
241 |
}
|
242 |
|
243 |
|
|
|
|
|
|
|
|
|
244 |
def measure_execution_time(func):
|
245 |
def wrapper(*args, **kwargs):
|
246 |
start_time = time.time()
|
|
|
7 |
import gc
|
8 |
import re
|
9 |
from dataclasses import dataclass
|
10 |
+
from datetime import datetime, timezone
|
11 |
from pathlib import Path
|
12 |
from enum import Enum
|
13 |
from string import Template
|
|
|
30 |
PROMPT_FIELD = "prompt"
|
31 |
HTTP = "http://"
|
32 |
HTTPS = HTTP[:4] + "s" + HTTP[4:]
|
|
|
33 |
FORMAT_UPDATE_BLOCK_NUMBER = 30411638
|
34 |
INVALID_ANSWER_HEX = (
|
35 |
"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
|
36 |
)
|
37 |
+
OLD_IPFS_ADDRESS = "https://gateway.autonolas.tech/ipfs/"
|
38 |
+
IPFS_ADDRESS = "https://gateway.gcp.autonolas.tech/ipfs/"
|
39 |
|
40 |
INC_TOOLS = [
|
41 |
"prediction-online",
|
|
|
243 |
}
|
244 |
|
245 |
|
246 |
+
def transform_to_datetime(x):
|
247 |
+
return datetime.fromtimestamp(int(x), tz=timezone.utc)
|
248 |
+
|
249 |
+
|
250 |
def measure_execution_time(func):
|
251 |
def wrapper(*args, **kwargs):
|
252 |
start_time = time.time()
|
scripts/web3_utils.py
CHANGED
@@ -33,7 +33,7 @@ LATEST_BLOCK: Optional[int] = None
|
|
33 |
LATEST_BLOCK_NAME: BlockParams = "latest"
|
34 |
BLOCK_DATA_NUMBER = "number"
|
35 |
BLOCKS_CHUNK_SIZE = 10_000
|
36 |
-
N_IPFS_RETRIES =
|
37 |
N_RPC_RETRIES = 100
|
38 |
RPC_POLL_INTERVAL = 0.05
|
39 |
SUBGRAPH_POLL_INTERVAL = 0.05
|
|
|
33 |
LATEST_BLOCK_NAME: BlockParams = "latest"
|
34 |
BLOCK_DATA_NUMBER = "number"
|
35 |
BLOCKS_CHUNK_SIZE = 10_000
|
36 |
+
N_IPFS_RETRIES = 4
|
37 |
N_RPC_RETRIES = 100
|
38 |
RPC_POLL_INTERVAL = 0.05
|
39 |
SUBGRAPH_POLL_INTERVAL = 0.05
|