lysandre HF staff commited on
Commit
d947152
1 Parent(s): a7e9f5d
Files changed (2) hide show
  1. app.py +36 -56
  2. index.js +3 -0
app.py CHANGED
@@ -17,12 +17,13 @@ HfFolder.save_token(HF_TOKEN)
17
  datasets = {
18
  "stars": load_dataset("open-source-metrics/preprocessed_stars"),
19
  "issues": load_dataset("open-source-metrics/preprocessed_issues"),
20
- "pip": load_dataset("open-source-metrics/pip").sort('day'),
21
  }
22
 
23
  external_datasets = {
24
  "pip": load_dataset("open-source-metrics/pip-external").sort('day'),
25
- "stars": load_dataset("open-source-metrics/stars-external")
 
26
  }
27
 
28
 
@@ -53,23 +54,21 @@ class RequestHandler(SimpleHTTPRequestHandler):
53
  return SimpleHTTPRequestHandler.do_GET(self)
54
 
55
  if self.path.startswith("/initialize"):
56
- dataset_keys = {k: set(v.keys()) for k, v in datasets.items()}
57
- dataset_with_most_splits = max([d for d in dataset_keys.values()], key=len)
 
 
58
 
59
  external_dataset_keys = {k: set(v.keys()) for k, v in external_datasets.items()}
60
  external_dataset_with_most_splits = max([d for d in external_dataset_keys.values()], key=len)
61
 
 
 
 
62
  warnings = []
63
 
64
  print("Initializing ...")
65
 
66
- # for k, v in dataset_keys.items():
67
- # if len(v) < len(dataset_with_most_splits):
68
- # warnings.append(
69
- # f"The {k} dataset does not contain all splits. Missing: {dataset_with_most_splits - v}."
70
- # f"\nSelecting that split to show the pip install numbers will not work."
71
- # )
72
-
73
  for k, v in external_dataset_keys.items():
74
  if len(v) < len(external_dataset_with_most_splits):
75
  warnings.append(
@@ -96,54 +95,35 @@ class RequestHandler(SimpleHTTPRequestHandler):
96
  if self.path.startswith("/retrievePipInstalls"):
97
  errors = []
98
  library_names, options = parse_name_and_options(self.path)
 
 
99
 
100
- if '1' in options:
101
- returned_values = {}
102
- for library_name in library_names:
103
- ds = None
104
- if library_name in datasets['pip']:
105
- ds = datasets['pip'][library_name]
106
- elif library_name in external_datasets['pip']:
107
- ds = external_datasets['pip'][library_name]
108
- else:
109
- errors.append(f"No {library_name} found in internal or external datasets.")
110
-
111
- for i in ds:
112
- if i['day'] in returned_values:
113
- returned_values[i['day']]['Cumulated'] += i['num_downloads']
114
- else:
115
- returned_values[i['day']] = {'Cumulated': i['num_downloads']}
116
-
117
- library_names = ['Cumulated']
118
 
 
 
 
 
 
 
 
 
 
 
 
119
  else:
120
- returned_values = {}
121
- for library_name in library_names:
122
-
123
- if library_name in datasets['pip']:
124
- ds = datasets['pip'][library_name]
125
- elif library_name in external_datasets['pip']:
126
- ds = external_datasets['pip'][library_name]
127
- else:
128
- errors.append(f"No {library_name} found in internal or external datasets for pip.")
129
- return {'errors': errors}
130
-
131
- for i in ds:
132
- if i['day'] in returned_values:
133
- returned_values[i['day']][library_name] = i['num_downloads']
134
- else:
135
- returned_values[i['day']] = {library_name: i['num_downloads']}
136
-
137
- for library_name in library_names:
138
- for i in returned_values.keys():
139
- if library_name not in returned_values[i]:
140
- returned_values[i][library_name] = None
141
-
142
- returned_values = collections.OrderedDict(sorted(returned_values.items()))
143
- output = {l: [k[l] for k in returned_values.values()] for l in library_names}
144
- output['day'] = list(returned_values.keys())
145
-
146
- return self.response(output)
147
 
148
  if self.path.startswith("/retrieveStars"):
149
  library_names, options = parse_name_and_options(self.path)
 
17
  datasets = {
18
  "stars": load_dataset("open-source-metrics/preprocessed_stars"),
19
  "issues": load_dataset("open-source-metrics/preprocessed_issues"),
20
+ "pip": load_dataset("open-source-metrics/preprocessed_pip").sort('day'),
21
  }
22
 
23
  external_datasets = {
24
  "pip": load_dataset("open-source-metrics/pip-external").sort('day'),
25
+ "stars": load_dataset("open-source-metrics/stars-external"),
26
+ "issues": load_dataset("open-source-metrics/issues-external")
27
  }
28
 
29
 
 
54
  return SimpleHTTPRequestHandler.do_GET(self)
55
 
56
  if self.path.startswith("/initialize"):
57
+ dataset_with_most_splits = max(datasets['stars'].column_names.values(), key=len)
58
+
59
+ if 'day' in dataset_with_most_splits:
60
+ dataset_with_most_splits.remove('day')
61
 
62
  external_dataset_keys = {k: set(v.keys()) for k, v in external_datasets.items()}
63
  external_dataset_with_most_splits = max([d for d in external_dataset_keys.values()], key=len)
64
 
65
+ for external in external_dataset_with_most_splits:
66
+ dataset_with_most_splits.remove(external)
67
+
68
  warnings = []
69
 
70
  print("Initializing ...")
71
 
 
 
 
 
 
 
 
72
  for k, v in external_dataset_keys.items():
73
  if len(v) < len(external_dataset_with_most_splits):
74
  warnings.append(
 
95
  if self.path.startswith("/retrievePipInstalls"):
96
  errors = []
97
  library_names, options = parse_name_and_options(self.path)
98
+ cumulated = '1' in options
99
+ week_over_week = '2' in options
100
 
101
+ def sum_of_lists(lists):
102
+ def _sum(items):
103
+ while None in items:
104
+ items.remove(None)
105
+ return sum(items)
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
+ return [_sum(list(a)) for a in zip(*lists)]
108
+
109
+ if week_over_week:
110
+ if cumulated:
111
+ cumulated_dict = {
112
+ 'Cumulated': sum_of_lists([v for k, v in datasets['pip']['wow'].to_dict().items() if k in library_names]),
113
+ 'day': datasets['pip']['wow'].to_dict()['day']
114
+ }
115
+ return self.response(cumulated_dict)
116
+ else:
117
+ return self.response({k: v for k, v in datasets['pip']['wow'].to_dict().items() if k in library_names + ['day']})
118
  else:
119
+ if cumulated:
120
+ cumulated_dict = {
121
+ 'Cumulated': sum_of_lists([v for k, v in datasets['pip']['raw'].to_dict().items() if k in library_names]),
122
+ 'day': datasets['pip']['raw'].to_dict()['day']
123
+ }
124
+ return self.response(cumulated_dict)
125
+ else:
126
+ return self.response({k: v for k, v in datasets['pip']['raw'].to_dict().items() if k in library_names + ['day']})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  if self.path.startswith("/retrieveStars"):
129
  library_names, options = parse_name_and_options(self.path)
index.js CHANGED
@@ -192,8 +192,11 @@ const initialize = async () => {
192
  }
193
 
194
  addOption('pip', "Cumulated");
 
 
195
  addOption('issue', "Exclude org members");
196
  addOption('issue', "Week over week");
 
197
  addOption('stars', "Week over week");
198
 
199
  const fetchButton = createButton('Fetch', inferJson, () => {
 
192
  }
193
 
194
  addOption('pip', "Cumulated");
195
+ addOption('pip', "Week over week");
196
+
197
  addOption('issue', "Exclude org members");
198
  addOption('issue', "Week over week");
199
+
200
  addOption('stars', "Week over week");
201
 
202
  const fetchButton = createButton('Fetch', inferJson, () => {