LysandreJik commited on
Commit
fe8da28
1 Parent(s): bd334dc

Cumulated only for pip

Browse files
Files changed (2) hide show
  1. app.py +78 -33
  2. index.js +6 -1
app.py CHANGED
@@ -6,7 +6,7 @@ from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer
6
  from urllib.parse import parse_qs, urlparse
7
 
8
  from huggingface_hub import list_datasets, set_access_token, HfFolder
9
- from datasets import load_dataset, DatasetDict
10
  import numpy as np
11
 
12
  HF_TOKEN = os.environ['HF_TOKEN']
@@ -20,6 +20,30 @@ datasets = {
20
  "pip": load_dataset("open-source-metrics/pip").sort('day')
21
  }
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  # datasets = {
24
  # k1: DatasetDict({
25
  # k2: v2.select(range(0, len(v2), max(1, int(len(v2) / 1000)))) for k2, v2 in v1.items()
@@ -27,6 +51,18 @@ datasets = {
27
  # }
28
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  def running_mean(x, N, total_length=-1):
31
  cumsum = np.cumsum(np.insert(x, 0, 0))
32
  to_pad = max(total_length - len(cumsum), 0)
@@ -43,7 +79,6 @@ class RequestHandler(SimpleHTTPRequestHandler):
43
 
44
  if self.path.startswith("/initialize"):
45
  dataset_keys = {k: set(v.keys()) for k, v in datasets.items()}
46
- dataset_keys['issues'].remove('transformers')
47
  dataset_with_most_splits = max([d for d in dataset_keys.values()], key=len)
48
  warnings = []
49
 
@@ -68,18 +103,34 @@ class RequestHandler(SimpleHTTPRequestHandler):
68
  library_names = query.get("input", None)[0]
69
  library_names = library_names.split(',')
70
 
71
- returned_values = {}
72
- for library_name in library_names:
73
- for i in datasets['pip'][library_name]:
74
- if i['day'] in returned_values:
75
- returned_values[i['day']][library_name] = i['num_downloads']
76
- else:
77
- returned_values[i['day']] = {library_name: i['num_downloads']}
78
-
79
- for library_name in library_names:
80
- for i in returned_values.keys():
81
- if library_name not in returned_values[i]:
82
- returned_values[i][library_name] = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  returned_values = collections.OrderedDict(sorted(returned_values.items()))
85
  output = {l: [k[l] for k in returned_values.values()] for l in library_names}
@@ -105,23 +156,20 @@ class RequestHandler(SimpleHTTPRequestHandler):
105
  for library_name in library_names:
106
  dataset = dataset_dict[library_name]
107
 
108
- n = 0
109
  for i in dataset:
110
- n += 1
111
  if i['dates'] in returned_values:
112
- returned_values[i['dates']][library_name] = n
113
  else:
114
- returned_values[i['dates']] = {library_name: n}
115
-
116
- for library_name in library_names:
117
- for i in returned_values.keys():
118
- if library_name not in returned_values[i]:
119
- returned_values[i][library_name] = None
120
 
121
  returned_values = collections.OrderedDict(sorted(returned_values.items()))
 
122
  output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
123
  output['day'] = list(returned_values.keys())[::-1]
124
 
 
 
 
125
  self.send_response(200)
126
  self.send_header("Content-Type", "application/json")
127
  self.end_headers()
@@ -142,23 +190,20 @@ class RequestHandler(SimpleHTTPRequestHandler):
142
  for library_name in library_names:
143
  dataset = dataset_dict[library_name]
144
 
145
- n = 0
146
- for k, i in enumerate(dataset):
147
- n += 1
148
  if i['dates'] in returned_values:
149
- returned_values[i['dates']][library_name] = n
150
  else:
151
- returned_values[i['dates']] = {library_name: n}
152
-
153
- for library_name in library_names:
154
- for i in returned_values.keys():
155
- if library_name not in returned_values[i]:
156
- returned_values[i][library_name] = None
157
 
158
  returned_values = collections.OrderedDict(sorted(returned_values.items()))
 
159
  output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
160
  output['day'] = list(returned_values.keys())[::-1]
161
 
 
 
 
162
  self.send_response(200)
163
  self.send_header("Content-Type", "application/json")
164
  self.end_headers()
 
6
  from urllib.parse import parse_qs, urlparse
7
 
8
  from huggingface_hub import list_datasets, set_access_token, HfFolder
9
+ from datasets import load_dataset, DatasetDict, Dataset
10
  import numpy as np
11
 
12
  HF_TOKEN = os.environ['HF_TOKEN']
 
20
  "pip": load_dataset("open-source-metrics/pip").sort('day')
21
  }
22
 
23
+ val = 0
24
+
25
+
26
+ def _range(e):
27
+ global val
28
+ e['range'] = val
29
+ val += 1
30
+ return e
31
+
32
+
33
+ stars = {}
34
+ for k, v in datasets['stars'].items():
35
+ stars[k] = v.map(_range)
36
+ val = 0
37
+
38
+ issues = {}
39
+ for k, v in datasets['issues'].items():
40
+ issues[k] = v.map(_range)
41
+ val = 0
42
+
43
+ datasets['stars'] = DatasetDict(**stars)
44
+ datasets['issues'] = DatasetDict(**issues)
45
+
46
+
47
  # datasets = {
48
  # k1: DatasetDict({
49
  # k2: v2.select(range(0, len(v2), max(1, int(len(v2) / 1000)))) for k2, v2 in v1.items()
 
51
  # }
52
 
53
 
54
+ def link_values(library_names, returned_values):
55
+ previous_values = {library_name: None for library_name in library_names}
56
+ for library_name in library_names:
57
+ for i in returned_values.keys():
58
+ if library_name not in returned_values[i]:
59
+ returned_values[i][library_name] = previous_values[library_name]
60
+ else:
61
+ previous_values[library_name] = returned_values[i][library_name]
62
+
63
+ return returned_values
64
+
65
+
66
  def running_mean(x, N, total_length=-1):
67
  cumsum = np.cumsum(np.insert(x, 0, 0))
68
  to_pad = max(total_length - len(cumsum), 0)
 
79
 
80
  if self.path.startswith("/initialize"):
81
  dataset_keys = {k: set(v.keys()) for k, v in datasets.items()}
 
82
  dataset_with_most_splits = max([d for d in dataset_keys.values()], key=len)
83
  warnings = []
84
 
 
103
  library_names = query.get("input", None)[0]
104
  library_names = library_names.split(',')
105
 
106
+ if 'Cumulated' in library_names:
107
+ dataset_keys = {k: set(v.keys()) for k, v in datasets.items()}
108
+ dataset_with_most_splits = max([d for d in dataset_keys.values()], key=len)
109
+ library_names = list(dataset_with_most_splits)
110
+
111
+ returned_values = {}
112
+ for library_name in library_names:
113
+ for i in datasets['pip'][library_name]:
114
+ if i['day'] in returned_values:
115
+ returned_values[i['day']]['Cumulated'] += i['num_downloads']
116
+ else:
117
+ returned_values[i['day']] = {'Cumulated': i['num_downloads']}
118
+
119
+ library_names = ['Cumulated']
120
+
121
+ else:
122
+ returned_values = {}
123
+ for library_name in library_names:
124
+ for i in datasets['pip'][library_name]:
125
+ if i['day'] in returned_values:
126
+ returned_values[i['day']][library_name] = i['num_downloads']
127
+ else:
128
+ returned_values[i['day']] = {library_name: i['num_downloads']}
129
+
130
+ for library_name in library_names:
131
+ for i in returned_values.keys():
132
+ if library_name not in returned_values[i]:
133
+ returned_values[i][library_name] = None
134
 
135
  returned_values = collections.OrderedDict(sorted(returned_values.items()))
136
  output = {l: [k[l] for k in returned_values.values()] for l in library_names}
 
156
  for library_name in library_names:
157
  dataset = dataset_dict[library_name]
158
 
 
159
  for i in dataset:
 
160
  if i['dates'] in returned_values:
161
+ returned_values[i['dates']][library_name] = i['range']
162
  else:
163
+ returned_values[i['dates']] = {library_name: i['range']}
 
 
 
 
 
164
 
165
  returned_values = collections.OrderedDict(sorted(returned_values.items()))
166
+ returned_values = link_values(library_names, returned_values)
167
  output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
168
  output['day'] = list(returned_values.keys())[::-1]
169
 
170
+ # Trim down to a smaller number of points.
171
+ output = {k: [v for i, v in enumerate(value) if i % int(len(value) / 100) == 0] for k, value in output.items()}
172
+
173
  self.send_response(200)
174
  self.send_header("Content-Type", "application/json")
175
  self.end_headers()
 
190
  for library_name in library_names:
191
  dataset = dataset_dict[library_name]
192
 
193
+ for i in dataset:
 
 
194
  if i['dates'] in returned_values:
195
+ returned_values[i['dates']][library_name] = i['range']
196
  else:
197
+ returned_values[i['dates']] = {library_name: i['range']}
 
 
 
 
 
198
 
199
  returned_values = collections.OrderedDict(sorted(returned_values.items()))
200
+ returned_values = link_values(library_names, returned_values)
201
  output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
202
  output['day'] = list(returned_values.keys())[::-1]
203
 
204
+ # Trim down to a smaller number of points.
205
+ output = {k: [v for i, v in enumerate(value) if i % int(len(value) / 100) == 0] for k, value in output.items()}
206
+
207
  self.send_response(200)
208
  self.send_header("Content-Type", "application/json")
209
  self.end_headers()
index.js CHANGED
@@ -41,6 +41,7 @@ const createButton = (title, libraries, methods) => {
41
  const initialize = async () => {
42
  const inferResponse = await fetch(`initialize`);
43
  const inferJson = await inferResponse.json();
 
44
  // const graphsDiv = document.getElementsByClassName('graphs')[0];
45
  const librarySelector = document.getElementById('library-selector');
46
  const graphSelector = document.getElementById('graph-selector');
@@ -62,7 +63,11 @@ const initialize = async () => {
62
 
63
  const checkBoxLabel = document.createElement('label');
64
  const labelSpan = document.createElement('span')
65
- labelSpan.textContent = element.charAt(0).toUpperCase() + element.slice(1)
 
 
 
 
66
  checkBoxLabel.appendChild(checkBox)
67
  checkBoxLabel.appendChild(labelSpan)
68
 
 
41
  const initialize = async () => {
42
  const inferResponse = await fetch(`initialize`);
43
  const inferJson = await inferResponse.json();
44
+ inferJson.push('Cumulated')
45
  // const graphsDiv = document.getElementsByClassName('graphs')[0];
46
  const librarySelector = document.getElementById('library-selector');
47
  const graphSelector = document.getElementById('graph-selector');
 
63
 
64
  const checkBoxLabel = document.createElement('label');
65
  const labelSpan = document.createElement('span')
66
+
67
+ if (element == 'Cumulated')
68
+ labelSpan.textContent = "Cumulated - Only works for pip installs, will crash otherwise."
69
+ else
70
+ labelSpan.textContent = element.charAt(0).toUpperCase() + element.slice(1)
71
  checkBoxLabel.appendChild(checkBox)
72
  checkBoxLabel.appendChild(labelSpan)
73