lysandre HF staff commited on
Commit
1bddee8
1 Parent(s): 2dd5810

External libraries

Browse files
Files changed (4) hide show
  1. app.py +85 -15
  2. index.html +1 -0
  3. index.js +42 -3
  4. style.css +9 -0
app.py CHANGED
@@ -17,7 +17,13 @@ HfFolder.save_token(HF_TOKEN)
17
  datasets = {
18
  "stars": load_dataset("open-source-metrics/stars").sort('dates'),
19
  "issues": load_dataset("open-source-metrics/issues").sort('dates'),
20
- "pip": load_dataset("open-source-metrics/pip").sort('day')
 
 
 
 
 
 
21
  }
22
 
23
  val = 0
@@ -45,11 +51,18 @@ def _ignore_org_members(e):
45
 
46
  return e
47
 
 
48
  stars = {}
49
  for k, v in datasets['stars'].items():
50
  stars[k] = v.map(_range)
51
  val = 0
52
 
 
 
 
 
 
 
53
  issues = {}
54
  for k, v in datasets['issues'].items():
55
  issues[k] = v.map(_range)
@@ -57,15 +70,18 @@ for k, v in datasets['issues'].items():
57
  issues[k] = issues[k].map(_ignore_org_members)
58
  val = 0
59
 
60
- datasets['stars'] = DatasetDict(**stars)
61
- datasets['issues'] = DatasetDict(**issues)
62
 
 
 
 
 
 
 
63
 
64
- # datasets = {
65
- # k1: DatasetDict({
66
- # k2: v2.select(range(0, len(v2), max(1, int(len(v2) / 1000)))) for k2, v2 in v1.items()
67
- # }) for k1, v1 in datasets.items()
68
- # }
69
 
70
 
71
  def link_values(library_names, returned_values):
@@ -108,25 +124,54 @@ class RequestHandler(SimpleHTTPRequestHandler):
108
  if self.path.startswith("/initialize"):
109
  dataset_keys = {k: set(v.keys()) for k, v in datasets.items()}
110
  dataset_with_most_splits = max([d for d in dataset_keys.values()], key=len)
 
 
 
 
111
  warnings = []
112
 
113
  for k, v in dataset_keys.items():
114
  if len(v) < len(dataset_with_most_splits):
115
- warnings.extend(f"The {k} dataset does not contain all splits. Missing: {dataset_with_most_splits - v}")
 
 
 
 
 
 
 
 
 
 
116
 
117
- # TODO: Send and display warnings
118
  dataset_with_most_splits = list(dataset_with_most_splits)
119
  dataset_with_most_splits.sort()
120
 
121
- return self.response(list(dataset_with_most_splits))
 
 
 
 
 
 
 
122
 
123
  if self.path.startswith("/retrievePipInstalls"):
 
124
  library_names, options = parse_name_and_options(self.path)
125
 
126
  if '1' in options:
127
  returned_values = {}
128
  for library_name in library_names:
129
- for i in datasets['pip'][library_name]:
 
 
 
 
 
 
 
 
130
  if i['day'] in returned_values:
131
  returned_values[i['day']]['Cumulated'] += i['num_downloads']
132
  else:
@@ -137,7 +182,16 @@ class RequestHandler(SimpleHTTPRequestHandler):
137
  else:
138
  returned_values = {}
139
  for library_name in library_names:
140
- for i in datasets['pip'][library_name]:
 
 
 
 
 
 
 
 
 
141
  if i['day'] in returned_values:
142
  returned_values[i['day']][library_name] = i['num_downloads']
143
  else:
@@ -155,13 +209,21 @@ class RequestHandler(SimpleHTTPRequestHandler):
155
  return self.response(output)
156
 
157
  if self.path.startswith("/retrieveStars"):
 
158
  library_names, options = parse_name_and_options(self.path)
159
  returned_values = {}
160
  dataset_dict = datasets['stars']
 
161
  week_over_week = '1' in options
162
 
163
  for library_name in library_names:
164
- dataset = dataset_dict[library_name]
 
 
 
 
 
 
165
 
166
  last_value = 0
167
  last_week = dataset[0]['week']
@@ -188,6 +250,7 @@ class RequestHandler(SimpleHTTPRequestHandler):
188
 
189
 
190
  if self.path.startswith("/retrieveIssues"):
 
191
  library_names, options = parse_name_and_options(self.path)
192
 
193
  exclude_org_members = '1' in options
@@ -195,10 +258,17 @@ class RequestHandler(SimpleHTTPRequestHandler):
195
 
196
  returned_values = {}
197
  dataset_dict = datasets['issues']
 
198
  range_id = 'range' if not exclude_org_members else 'range_non_org'
199
 
200
  for library_name in library_names:
201
- dataset = dataset_dict[library_name]
 
 
 
 
 
 
202
 
203
  last_value = 0
204
  last_week = dataset[0]['week']
17
  datasets = {
18
  "stars": load_dataset("open-source-metrics/stars").sort('dates'),
19
  "issues": load_dataset("open-source-metrics/issues").sort('dates'),
20
+ "pip": load_dataset("open-source-metrics/pip").sort('day'),
21
+ }
22
+
23
+ external_datasets = {
24
+ "stars": load_dataset("open-source-metrics/stars-external").sort('dates'),
25
+ "issues": load_dataset("open-source-metrics/issues-external").sort('dates'),
26
+ "pip": load_dataset("open-source-metrics/pip-external").sort('day')
27
  }
28
 
29
  val = 0
51
 
52
  return e
53
 
54
+
55
  stars = {}
56
  for k, v in datasets['stars'].items():
57
  stars[k] = v.map(_range)
58
  val = 0
59
 
60
+ stars_external = {}
61
+ for k, v in external_datasets['stars'].items():
62
+ stars_external[k] = v.map(_range)
63
+ val = 0
64
+
65
+
66
  issues = {}
67
  for k, v in datasets['issues'].items():
68
  issues[k] = v.map(_range)
70
  issues[k] = issues[k].map(_ignore_org_members)
71
  val = 0
72
 
 
 
73
 
74
+ issues_external = {}
75
+ for k, v in external_datasets['issues'].items():
76
+ issues_external[k] = v.map(_range)
77
+ val = 0
78
+ issues_external[k] = issues_external[k].map(_ignore_org_members)
79
+ val = 0
80
 
81
+ datasets['stars'] = DatasetDict(**stars)
82
+ datasets['issues'] = DatasetDict(**issues)
83
+ external_datasets['stars'] = DatasetDict(**stars_external)
84
+ external_datasets['issues'] = DatasetDict(**issues_external)
 
85
 
86
 
87
  def link_values(library_names, returned_values):
124
  if self.path.startswith("/initialize"):
125
  dataset_keys = {k: set(v.keys()) for k, v in datasets.items()}
126
  dataset_with_most_splits = max([d for d in dataset_keys.values()], key=len)
127
+
128
+ external_dataset_keys = {k: set(v.keys()) for k, v in external_datasets.items()}
129
+ external_dataset_with_most_splits = max([d for d in external_dataset_keys.values()], key=len)
130
+
131
  warnings = []
132
 
133
  for k, v in dataset_keys.items():
134
  if len(v) < len(dataset_with_most_splits):
135
+ warnings.append(
136
+ f"The {k} dataset does not contain all splits. Missing: {dataset_with_most_splits - v}."
137
+ f"\nSelecting that split to show the pip install numbers will not work."
138
+ )
139
+
140
+ for k, v in external_dataset_keys.items():
141
+ if len(v) < len(external_dataset_with_most_splits):
142
+ warnings.append(
143
+ f"The {k} dataset does not contain all splits. Missing: {external_dataset_with_most_splits - v}"
144
+ f".\nSelecting that split to show the pip install numbers will not work."
145
+ )
146
 
 
147
  dataset_with_most_splits = list(dataset_with_most_splits)
148
  dataset_with_most_splits.sort()
149
 
150
+ external_dataset_with_most_splits = list(external_dataset_with_most_splits)
151
+ external_dataset_with_most_splits.sort()
152
+
153
+ return self.response({
154
+ 'internal': list(dataset_with_most_splits),
155
+ 'external': external_dataset_with_most_splits,
156
+ 'warnings': warnings
157
+ })
158
 
159
  if self.path.startswith("/retrievePipInstalls"):
160
+ errors = []
161
  library_names, options = parse_name_and_options(self.path)
162
 
163
  if '1' in options:
164
  returned_values = {}
165
  for library_name in library_names:
166
+ ds = None
167
+ if library_name in datasets['pip']:
168
+ ds = datasets['pip'][library_name]
169
+ elif library_name in external_datasets['pip']:
170
+ ds = external_datasets['pip'][library_name]
171
+ else:
172
+ errors.append(f"No {library_name} found in internal or external datasets.")
173
+
174
+ for i in ds:
175
  if i['day'] in returned_values:
176
  returned_values[i['day']]['Cumulated'] += i['num_downloads']
177
  else:
182
  else:
183
  returned_values = {}
184
  for library_name in library_names:
185
+
186
+ if library_name in datasets['pip']:
187
+ ds = datasets['pip'][library_name]
188
+ elif library_name in external_datasets['pip']:
189
+ ds = external_datasets['pip'][library_name]
190
+ else:
191
+ errors.append(f"No {library_name} found in internal or external datasets for pip.")
192
+ return {'errors': errors}
193
+
194
+ for i in ds:
195
  if i['day'] in returned_values:
196
  returned_values[i['day']][library_name] = i['num_downloads']
197
  else:
209
  return self.response(output)
210
 
211
  if self.path.startswith("/retrieveStars"):
212
+ errors = []
213
  library_names, options = parse_name_and_options(self.path)
214
  returned_values = {}
215
  dataset_dict = datasets['stars']
216
+ external_dataset_dict = external_datasets['stars']
217
  week_over_week = '1' in options
218
 
219
  for library_name in library_names:
220
+ if library_name in dataset_dict:
221
+ dataset = dataset_dict[library_name]
222
+ elif library_name in external_dataset_dict:
223
+ dataset = external_dataset_dict[library_name]
224
+ else:
225
+ errors.append(f"No {library_name} found in internal or external datasets for stars.")
226
+ return {'errors': errors}
227
 
228
  last_value = 0
229
  last_week = dataset[0]['week']
250
 
251
 
252
  if self.path.startswith("/retrieveIssues"):
253
+ errors = []
254
  library_names, options = parse_name_and_options(self.path)
255
 
256
  exclude_org_members = '1' in options
258
 
259
  returned_values = {}
260
  dataset_dict = datasets['issues']
261
+ external_dataset_dict = external_datasets['issues']
262
  range_id = 'range' if not exclude_org_members else 'range_non_org'
263
 
264
  for library_name in library_names:
265
+ if library_name in dataset_dict:
266
+ dataset = dataset_dict[library_name]
267
+ elif library_name in external_dataset_dict:
268
+ dataset = external_dataset_dict[library_name]
269
+ else:
270
+ errors.append(f"No {library_name} found in internal or external datasets for stars.")
271
+ return {'errors': errors}
272
 
273
  last_value = 0
274
  last_week = dataset[0]['week']
index.html CHANGED
@@ -10,6 +10,7 @@
10
  </head>
11
 
12
  <body>
 
13
  <div id="loading"></div>
14
  <div class="graphs">
15
  <div id="library-selector"></div>
10
  </head>
11
 
12
  <body>
13
+ <div id="warnings"></div>
14
  <div id="loading"></div>
15
  <div class="graphs">
16
  <div id="library-selector"></div>
index.js CHANGED
@@ -82,7 +82,9 @@ const createButton = (title, libraries, methods) => {
82
  document.getElementById('issue-graph').innerHTML = ''
83
  const e = load()
84
  document.body.appendChild(e)
85
- const selectedLibraries = libraries.filter(e => document.querySelector(`#${e}Checkbox`).checked);
 
 
86
 
87
  const relevantOptions = getCheckedOptions();
88
 
@@ -102,8 +104,11 @@ const createButton = (title, libraries, methods) => {
102
 
103
  const initialize = async () => {
104
  const inferResponse = await fetch(`initialize`);
 
105
  const inferJson = await inferResponse.json();
106
- // const graphsDiv = document.getElementsByClassName('graphs')[0];
 
 
107
  const librarySelector = document.getElementById('library-selector');
108
  const graphSelector = document.getElementById('graph-selector');
109
  const selectorSubmit = document.getElementById('selector-submit');
@@ -116,7 +121,41 @@ const initialize = async () => {
116
  graphSpan.textContent = "Select graphs to display"
117
  graphSelector.appendChild(graphSpan);
118
 
119
- for (const element of inferJson) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  const div = document.createElement('div');
121
  const checkBox = document.createElement('input');
122
  checkBox.type = 'checkbox'
82
  document.getElementById('issue-graph').innerHTML = ''
83
  const e = load()
84
  document.body.appendChild(e)
85
+ const selectedInternalLibraries = libraries.internal.filter(e => document.querySelector(`#${e}Checkbox`).checked);
86
+ const selectedExternalLibraries = libraries.external.filter(e => document.querySelector(`#${e}Checkbox`).checked);
87
+ const selectedLibraries = selectedInternalLibraries.concat(selectedExternalLibraries);
88
 
89
  const relevantOptions = getCheckedOptions();
90
 
104
 
105
  const initialize = async () => {
106
  const inferResponse = await fetch(`initialize`);
107
+ console.log(inferResponse);
108
  const inferJson = await inferResponse.json();
109
+ console.log(inferJson);
110
+
111
+ const warnings = document.getElementById("warnings")
112
  const librarySelector = document.getElementById('library-selector');
113
  const graphSelector = document.getElementById('graph-selector');
114
  const selectorSubmit = document.getElementById('selector-submit');
121
  graphSpan.textContent = "Select graphs to display"
122
  graphSelector.appendChild(graphSpan);
123
 
124
+ if (inferJson.warnings.length > 0) {
125
+ const div = document.createElement('div');
126
+ div.classList.add('warning-div')
127
+
128
+ for (const warning of inferJson.warnings) {
129
+ const labelSpan = document.createElement('span');
130
+ labelSpan.textContent = `Warning: ${warning}`;
131
+
132
+ div.appendChild(labelSpan);
133
+ }
134
+ warnings.appendChild(div);
135
+ }
136
+
137
+ for (const element of inferJson.internal) {
138
+ const div = document.createElement('div');
139
+ const checkBox = document.createElement('input');
140
+ checkBox.type = 'checkbox'
141
+ checkBox.id = `${element}Checkbox`;
142
+
143
+ const checkBoxLabel = document.createElement('label');
144
+ const labelSpan = document.createElement('span')
145
+
146
+ labelSpan.textContent = element.charAt(0).toUpperCase() + element.slice(1)
147
+ checkBoxLabel.appendChild(checkBox)
148
+ checkBoxLabel.appendChild(labelSpan)
149
+
150
+ div.appendChild(checkBoxLabel)
151
+ librarySelector.appendChild(div)
152
+ }
153
+
154
+ const externalLibs = document.createElement("h3")
155
+ externalLibs.textContent = "External Libraries"
156
+ librarySelector.appendChild(externalLibs);
157
+
158
+ for (const element of inferJson.external) {
159
  const div = document.createElement('div');
160
  const checkBox = document.createElement('input');
161
  checkBox.type = 'checkbox'
style.css CHANGED
@@ -52,6 +52,15 @@ button:hover {
52
  margin-left: 20px;
53
  }
54
 
 
 
 
 
 
 
 
 
 
55
  .submit {
56
  margin-bottom: 50px;
57
  }
52
  margin-left: 20px;
53
  }
54
 
55
+ .warning-div {
56
+ background-color: rgb(255, 230, 164);
57
+ border-radius: 10px;
58
+ border-bottom: 3px solid rgb(235, 210, 144);
59
+ border-right: 3px solid rgb(235, 210, 144);
60
+ margin: 10px;
61
+ padding: 20px;
62
+ }
63
+
64
  .submit {
65
  margin-bottom: 50px;
66
  }