rosacastillo commited on
Commit
6e7e273
·
1 Parent(s): 00f2003

cleaning and refactoring code

Browse files
scripts/pull_data.py CHANGED
@@ -13,9 +13,7 @@ from markets import (
13
  DEFAULT_FILENAME as MARKETS_FILENAME,
14
  )
15
  from tools import (
16
- etl as tools_etl,
17
  DEFAULT_FILENAME as TOOLS_FILENAME,
18
- update_tools_accuracy,
19
  generate_tools_file,
20
  )
21
  from profitability import run_profitability_analysis
 
13
  DEFAULT_FILENAME as MARKETS_FILENAME,
14
  )
15
  from tools import (
 
16
  DEFAULT_FILENAME as TOOLS_FILENAME,
 
17
  generate_tools_file,
18
  )
19
  from profitability import run_profitability_analysis
scripts/roi_analysis.py CHANGED
@@ -13,7 +13,6 @@ from markets import (
13
  TOOLS_FILENAME = "tools_2024.parquet"
14
  from tools import (
15
  etl as tools_etl,
16
- update_tools_accuracy,
17
  )
18
  from pull_data import (
19
  DATA_DIR,
 
13
  TOOLS_FILENAME = "tools_2024.parquet"
14
  from tools import (
15
  etl as tools_etl,
 
16
  )
17
  from pull_data import (
18
  DATA_DIR,
scripts/tools.py CHANGED
@@ -613,77 +613,6 @@ def generate_tools_file():
613
  print(f"An Exception happened while parsing the json events {e}")
614
 
615
 
616
- def update_tools_accuracy(
617
- tools_acc: pd.DataFrame, tools_df: pd.DataFrame, inc_tools: List[str]
618
- ) -> pd.DataFrame:
619
- """To compute/update the latest accuracy information for the different mech tools"""
620
-
621
- # computation of the accuracy information
622
- tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
623
- # filtering errors
624
- tools_non_error = tools_inc[tools_inc["error"] != 1]
625
- tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
626
- {"no": "No", "yes": "Yes"}
627
- )
628
- tools_non_error = tools_non_error[
629
- tools_non_error["currentAnswer"].isin(["Yes", "No"])
630
- ]
631
- tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
632
- tools_non_error["win"] = (
633
- tools_non_error["currentAnswer"] == tools_non_error["vote"]
634
- ).astype(int)
635
- tools_non_error.columns = tools_non_error.columns.astype(str)
636
- print("Tools dataset after filtering")
637
- print(tools_non_error.head())
638
-
639
- wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0)
640
- wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100
641
- wins.reset_index(inplace=True)
642
- wins["total_requests"] = wins[0] + wins[1]
643
- wins.columns = wins.columns.astype(str)
644
- wins = wins[["tool", "tool_accuracy", "total_requests"]]
645
-
646
- print("Wins dataset")
647
- print(wins.head())
648
- no_timeline_info = False
649
- try:
650
- timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"])
651
- print("timeline dataset")
652
- print(timeline.head())
653
- acc_info = wins.merge(timeline, how="left", on="tool")
654
- except:
655
- print("NO REQUEST TIME INFORMATION AVAILABLE")
656
- no_timeline_info = True
657
- acc_info = wins
658
-
659
- if tools_acc is None:
660
- print("Creating accuracy file for the first time")
661
- return acc_info
662
-
663
- # update the old information
664
- print("Updating accuracy information")
665
- tools_to_update = list(acc_info["tool"].values)
666
- print("tools to update")
667
- print(tools_to_update)
668
- existing_tools = list(tools_acc["tool"].values)
669
- for tool in tools_to_update:
670
- if tool in existing_tools:
671
- new_accuracy = acc_info[acc_info["tool"] == tool]["tool_accuracy"].values[0]
672
- new_volume = acc_info[acc_info["tool"] == tool]["total_requests"].values[0]
673
- if no_timeline_info:
674
- new_min_timeline = None
675
- new_max_timeline = None
676
- else:
677
- new_min_timeline = acc_info[acc_info["tool"] == tool]["min"].values[0]
678
- new_max_timeline = acc_info[acc_info["tool"] == tool]["max"].values[0]
679
- tools_acc.loc[tools_acc["tool"] == tool, "tool_accuracy"] = new_accuracy
680
- tools_acc.loc[tools_acc["tool"] == tool, "total_requests"] = new_volume
681
- tools_acc.loc[tools_acc["tool"] == tool, "min"] = new_min_timeline
682
- tools_acc.loc[tools_acc["tool"] == tool, "max"] = new_max_timeline
683
- print(tools_acc)
684
- return tools_acc
685
-
686
-
687
  if __name__ == "__main__":
688
  RPCs = [
689
  "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a",
 
613
  print(f"An Exception happened while parsing the json events {e}")
614
 
615
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
  if __name__ == "__main__":
617
  RPCs = [
618
  "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a",
scripts/update_tools_accuracy.py CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
3
  import ipfshttpclient
4
  from pathlib import Path
5
  from utils import INC_TOOLS
6
- from tools import update_tools_accuracy
7
 
8
  ACCURACY_FILENAME = "tools_accuracy.csv"
9
  IPFS_SERVER = "/dns/registry.autonolas.tech/tcp/443/https"
@@ -12,6 +12,77 @@ ROOT_DIR = SCRIPTS_DIR.parent
12
  DATA_DIR = ROOT_DIR / "data"
13
 
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def compute_tools_accuracy():
16
  print("Computing accuracy of tools")
17
  print("Reading tools parquet file")
 
3
  import ipfshttpclient
4
  from pathlib import Path
5
  from utils import INC_TOOLS
6
+ from typing import List
7
 
8
  ACCURACY_FILENAME = "tools_accuracy.csv"
9
  IPFS_SERVER = "/dns/registry.autonolas.tech/tcp/443/https"
 
12
  DATA_DIR = ROOT_DIR / "data"
13
 
14
 
15
+ def update_tools_accuracy(
16
+ tools_acc: pd.DataFrame, tools_df: pd.DataFrame, inc_tools: List[str]
17
+ ) -> pd.DataFrame:
18
+ """To compute/update the latest accuracy information for the different mech tools"""
19
+
20
+ # computation of the accuracy information
21
+ tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
22
+ # filtering errors
23
+ tools_non_error = tools_inc[tools_inc["error"] != 1]
24
+ tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
25
+ {"no": "No", "yes": "Yes"}
26
+ )
27
+ tools_non_error = tools_non_error[
28
+ tools_non_error["currentAnswer"].isin(["Yes", "No"])
29
+ ]
30
+ tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
31
+ tools_non_error["win"] = (
32
+ tools_non_error["currentAnswer"] == tools_non_error["vote"]
33
+ ).astype(int)
34
+ tools_non_error.columns = tools_non_error.columns.astype(str)
35
+ print("Tools dataset after filtering")
36
+ print(tools_non_error.head())
37
+
38
+ wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0)
39
+ wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100
40
+ wins.reset_index(inplace=True)
41
+ wins["total_requests"] = wins[0] + wins[1]
42
+ wins.columns = wins.columns.astype(str)
43
+ wins = wins[["tool", "tool_accuracy", "total_requests"]]
44
+
45
+ print("Wins dataset")
46
+ print(wins.head())
47
+ no_timeline_info = False
48
+ try:
49
+ timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"])
50
+ print("timeline dataset")
51
+ print(timeline.head())
52
+ acc_info = wins.merge(timeline, how="left", on="tool")
53
+ except:
54
+ print("NO REQUEST TIME INFORMATION AVAILABLE")
55
+ no_timeline_info = True
56
+ acc_info = wins
57
+
58
+ if tools_acc is None:
59
+ print("Creating accuracy file for the first time")
60
+ return acc_info
61
+
62
+ # update the old information
63
+ print("Updating accuracy information")
64
+ tools_to_update = list(acc_info["tool"].values)
65
+ print("tools to update")
66
+ print(tools_to_update)
67
+ existing_tools = list(tools_acc["tool"].values)
68
+ for tool in tools_to_update:
69
+ if tool in existing_tools:
70
+ new_accuracy = acc_info[acc_info["tool"] == tool]["tool_accuracy"].values[0]
71
+ new_volume = acc_info[acc_info["tool"] == tool]["total_requests"].values[0]
72
+ if no_timeline_info:
73
+ new_min_timeline = None
74
+ new_max_timeline = None
75
+ else:
76
+ new_min_timeline = acc_info[acc_info["tool"] == tool]["min"].values[0]
77
+ new_max_timeline = acc_info[acc_info["tool"] == tool]["max"].values[0]
78
+ tools_acc.loc[tools_acc["tool"] == tool, "tool_accuracy"] = new_accuracy
79
+ tools_acc.loc[tools_acc["tool"] == tool, "total_requests"] = new_volume
80
+ tools_acc.loc[tools_acc["tool"] == tool, "min"] = new_min_timeline
81
+ tools_acc.loc[tools_acc["tool"] == tool, "max"] = new_max_timeline
82
+ print(tools_acc)
83
+ return tools_acc
84
+
85
+
86
  def compute_tools_accuracy():
87
  print("Computing accuracy of tools")
88
  print("Reading tools parquet file")