LysandreJik commited on
Commit
9ac5ea2
1 Parent(s): ffd3765
Files changed (1) hide show
  1. app.py +17 -8
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  from datetime import datetime, timedelta
3
  import json
@@ -16,7 +17,7 @@ HfFolder.save_token(HF_TOKEN)
16
  datasets = {
17
  # "stars": load_dataset("open-source-metrics/stars"),
18
  "issues": load_dataset("open-source-metrics/issues"),
19
- "pip": load_dataset("open-source-metrics/pip")
20
  }
21
 
22
 
@@ -49,6 +50,8 @@ class RequestHandler(SimpleHTTPRequestHandler):
49
  self.end_headers()
50
 
51
  # TODO: Send and display warnings
 
 
52
  self.wfile.write(json.dumps(list(dataset_with_most_splits)).encode("utf-8"))
53
 
54
  return SimpleHTTPRequestHandler
@@ -61,8 +64,7 @@ class RequestHandler(SimpleHTTPRequestHandler):
61
 
62
  returned_values = {}
63
  for library_name in library_names:
64
- dataset = load_dataset(f"open-source-metrics/{library_name}-pip-installs", use_auth_token=True)['train']
65
- for i in dataset:
66
  if i['day'] in returned_values:
67
  returned_values[i['day']][library_name] = i['num_downloads']
68
  else:
@@ -71,10 +73,11 @@ class RequestHandler(SimpleHTTPRequestHandler):
71
  for library_name in library_names:
72
  for i in returned_values.keys():
73
  if library_name not in returned_values[i]:
74
- returned_values[i][library_name] = 0
75
 
76
- output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
77
- output['day'] = list(returned_values.keys())[::-1]
 
78
 
79
  self.send_response(200)
80
  self.send_header("Content-Type", "application/json")
@@ -91,12 +94,17 @@ class RequestHandler(SimpleHTTPRequestHandler):
91
  library_names = library_names.split(',')
92
 
93
  returned_values = {}
94
- dataset_dict = load_dataset(f"open-source-metrics/stars", use_auth_token=True, revision='90cb31b2db73c8c4291bcf317d831595e4fb2a91').sort('dates')
95
 
96
  for library_name in library_names:
97
  dataset = dataset_dict[library_name]
 
98
  n = 0
99
- for i in dataset:
 
 
 
 
100
  n += 1
101
  if i['dates'] in returned_values:
102
  returned_values[i['dates']][library_name] = n
@@ -108,6 +116,7 @@ class RequestHandler(SimpleHTTPRequestHandler):
108
  if library_name not in returned_values[i]:
109
  returned_values[i][library_name] = None
110
 
 
111
  output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
112
  output['day'] = list(returned_values.keys())[::-1]
113
 
 
1
+ import collections
2
  import os
3
  from datetime import datetime, timedelta
4
  import json
 
17
  datasets = {
18
  # "stars": load_dataset("open-source-metrics/stars"),
19
  "issues": load_dataset("open-source-metrics/issues"),
20
+ "pip": load_dataset("open-source-metrics/pip").sort('day')
21
  }
22
 
23
 
 
50
  self.end_headers()
51
 
52
  # TODO: Send and display warnings
53
+ dataset_with_most_splits = list(dataset_with_most_splits)
54
+ dataset_with_most_splits.sort()
55
  self.wfile.write(json.dumps(list(dataset_with_most_splits)).encode("utf-8"))
56
 
57
  return SimpleHTTPRequestHandler
 
64
 
65
  returned_values = {}
66
  for library_name in library_names:
67
+ for i in datasets['pip'][library_name]:
 
68
  if i['day'] in returned_values:
69
  returned_values[i['day']][library_name] = i['num_downloads']
70
  else:
 
73
  for library_name in library_names:
74
  for i in returned_values.keys():
75
  if library_name not in returned_values[i]:
76
+ returned_values[i][library_name] = None
77
 
78
+ returned_values = collections.OrderedDict(sorted(returned_values.items()))
79
+ output = {l: [k[l] for k in returned_values.values()] for l in library_names}
80
+ output['day'] = list(returned_values.keys())
81
 
82
  self.send_response(200)
83
  self.send_header("Content-Type", "application/json")
 
94
  library_names = library_names.split(',')
95
 
96
  returned_values = {}
97
+ dataset_dict = load_dataset(f"open-source-metrics/stars", use_auth_token=True).sort('dates')
98
 
99
  for library_name in library_names:
100
  dataset = dataset_dict[library_name]
101
+
102
  n = 0
103
+ for k, i in enumerate(dataset):
104
+ # Decimate values if there are too many
105
+ if len(dataset) > 1000 and k % int(len(dataset) / 1000) != 0:
106
+ continue
107
+
108
  n += 1
109
  if i['dates'] in returned_values:
110
  returned_values[i['dates']][library_name] = n
 
116
  if library_name not in returned_values[i]:
117
  returned_values[i][library_name] = None
118
 
119
+ returned_values = collections.OrderedDict(sorted(returned_values.items()))
120
  output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
121
  output['day'] = list(returned_values.keys())[::-1]
122