SaulLu commited on
Commit
4615d65
1 Parent(s): 57845b8

test with different data

Browse files
dashboard_utils/bubbles.py CHANGED
@@ -1,4 +1,5 @@
1
  import datetime
 
2
  from concurrent.futures import as_completed
3
  from urllib import parse
4
 
@@ -13,19 +14,25 @@ WANDB_REPO = "learning-at-home/Worker_logs"
13
 
14
  @simple_time_tracker(_log)
15
  def get_new_bubble_data():
16
- serialized_data_points, latest_timestamp = get_serialized_data_points()
 
17
  serialized_data = get_serialized_data(serialized_data_points, latest_timestamp)
18
- profiles = get_profiles(serialized_data_points)
 
 
 
 
 
19
 
20
  return serialized_data, profiles
21
 
22
 
23
  @simple_time_tracker(_log)
24
- def get_profiles(serialized_data_points):
25
  profiles = []
26
  with FuturesSession() as session:
27
  futures = []
28
- for username in serialized_data_points.keys():
29
  future = session.get(URL_QUICKSEARCH + parse.urlencode({"type": "user", "q": username}))
30
  future.username = username
31
  futures.append(future)
@@ -100,30 +107,36 @@ def get_serialized_data_points():
100
 
101
  @simple_time_tracker(_log)
102
  def get_serialized_data(serialized_data_points, latest_timestamp):
103
- serialized_data_points_v2 = []
104
- max_velocity = 1
105
- for run_name, serialized_data_point in serialized_data_points.items():
106
- activeRuns = []
107
- loss = 0
108
- runtime = 0
109
- batches = 0
110
- velocity = 0
111
- for run in serialized_data_point["Runs"]:
112
- if run["date"] == latest_timestamp:
113
- run["date"] = run["date"].isoformat()
114
- activeRuns.append(run)
115
- loss += run["loss"]
116
- velocity += run["velocity"]
117
- loss = loss / len(activeRuns) if activeRuns else 0
118
- runtime += run["runtime"]
119
- batches += run["batches"]
120
- new_item = {
121
- "date": latest_timestamp.isoformat(),
122
- "profileId": run_name,
123
- "batches": batches,
124
- "runtime": runtime,
125
- "activeRuns": activeRuns,
126
- }
127
- serialized_data_points_v2.append(new_item)
128
- serialized_data = {"points": [serialized_data_points_v2], "maxVelocity": max_velocity}
 
 
 
 
 
 
129
  return serialized_data
 
1
  import datetime
2
+ import json
3
  from concurrent.futures import as_completed
4
  from urllib import parse
5
 
 
14
 
15
  @simple_time_tracker(_log)
16
  def get_new_bubble_data():
17
+ # serialized_data_points, latest_timestamp = get_serialized_data_points()
18
+ serialized_data_points, latest_timestamp = None, None
19
  serialized_data = get_serialized_data(serialized_data_points, latest_timestamp)
20
+
21
+ usernames = []
22
+ for item in serialized_data["points"][0]:
23
+ usernames.append(item["profileId"])
24
+
25
+ profiles = get_profiles(usernames)
26
 
27
  return serialized_data, profiles
28
 
29
 
30
  @simple_time_tracker(_log)
31
+ def get_profiles(usernames):
32
  profiles = []
33
  with FuturesSession() as session:
34
  futures = []
35
+ for username in usernames:
36
  future = session.get(URL_QUICKSEARCH + parse.urlencode({"type": "user", "q": username}))
37
  future.username = username
38
  futures.append(future)
 
107
 
108
  @simple_time_tracker(_log)
109
  def get_serialized_data(serialized_data_points, latest_timestamp):
110
+ # serialized_data_points_v2 = []
111
+ # max_velocity = 1
112
+ # for run_name, serialized_data_point in serialized_data_points.items():
113
+ # activeRuns = []
114
+ # loss = 0
115
+ # runtime = 0
116
+ # batches = 0
117
+ # velocity = 0
118
+ # for run in serialized_data_point["Runs"]:
119
+ # if run["date"] == latest_timestamp:
120
+ # run["date"] = run["date"].isoformat()
121
+ # activeRuns.append(run)
122
+ # loss += run["loss"]
123
+ # velocity += run["velocity"]
124
+ # loss = loss / len(activeRuns) if activeRuns else 0
125
+ # runtime += run["runtime"]
126
+ # batches += run["batches"]
127
+ # new_item = {
128
+ # "date": latest_timestamp.isoformat(),
129
+ # "profileId": run_name,
130
+ # "batches": batches,
131
+ # "runtime": runtime,
132
+ # "activeRuns": activeRuns,
133
+ # }
134
+ # serialized_data_points_v2.append(new_item)
135
+ # serialized_data = {"points": [serialized_data_points_v2], "maxVelocity": max_velocity}
136
+ with open(
137
+ "/mnt/storage/Documents/hugging_face/colaborative_hub_training/demo_neurips/training-transformers-together-dashboard/data/"
138
+ "serializaledata_V2.json",
139
+ "r",
140
+ ) as f:
141
+ serialized_data = json.load(f)
142
  return serialized_data
data/serializaledata.json ADDED
The diff for this file is too large to render. See raw diff
 
perso/change_data.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import random
3
+
4
+ with open(
5
+ "/mnt/storage/Documents/hugging_face/colaborative_hub_training/demo_neurips/training-transformers-together-dashboard/data/"
6
+ "serializaledata.json",
7
+ "r",
8
+ ) as f:
9
+ serialized_data = json.load(f)
10
+
11
+ serialized_data_v2 = serialized_data
12
+ serialized_data_v2["points"] = [[item for item in serialized_data["points"][-1] if random.random() > 0.8]]
13
+
14
+ with open(
15
+ "/mnt/storage/Documents/hugging_face/colaborative_hub_training/demo_neurips/training-transformers-together-dashboard/data/"
16
+ "serializaledata_V2.json",
17
+ "w",
18
+ ) as f:
19
+ f.write(json.dumps(serialized_data_v2))
perso/get_usernames.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ with open(
4
+ "/mnt/storage/Documents/hugging_face/colaborative_hub_training/demo_neurips/training-transformers-together-dashboard/data/"
5
+ "serializaledata_V2.json",
6
+ "r",
7
+ ) as f:
8
+ serialized_data = json.load(f)
9
+
10
+ usernames = []
11
+ for item in serialized_data["points"][0]:
12
+ usernames.append(item["profileId"])
13
+
14
+ print(usernames)