nkasmanoff commited on
Commit
55ee8cd
1 Parent(s): 06758b6

Create helpers.py

Browse files
Files changed (1) hide show
  1. helpers.py +50 -0
helpers.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+ def clean_up_tags(tags_list):
4
+ tags_cleaned = []
5
+ for tag in tags_list:
6
+ if ':' in tag:
7
+ tag = tag.split(':')[1]
8
+
9
+ tags_cleaned.append(tag)
10
+
11
+ return ", ".join(tags_cleaned)
12
+
13
+
14
+
15
+ def check_api_url(url):
16
+ """
17
+ This function checks to see if "api" is present in the URL between ".co" and "/datasets". If not, it inserts "api" in the correct position.
18
+
19
+ Args:
20
+ url (str): A URL string
21
+
22
+ Returns:
23
+ str: A URL string with "api" inserted if necessary
24
+ """
25
+ # Split the URL into three parts based on the location of ".co" and "/datasets"
26
+ parts = url.split(".co")
27
+ first_part = parts[0] + ".co"
28
+ last_part = parts[1]
29
+ last_parts = last_part.split("/datasets")
30
+ middle_part = ""
31
+ if len(last_parts) > 1 and "/api" not in last_parts[0]:
32
+ middle_part = "/api"
33
+ # Concatenate the three parts to form the final URL
34
+ new_url = first_part + middle_part + last_parts[0] + "/datasets" + last_parts[1]
35
+ return new_url
36
+
37
+
38
+
39
+ def get_dataset_metadata(dataset_url):
40
+ retrieved_metadata = {}
41
+ dataset_url = check_api_url(dataset_url)
42
+ keys_to_retrieve = ['id','description', 'tags']
43
+ response = requests.get(dataset_url)
44
+ if response.status_code == 200:
45
+ response_json = response.json()
46
+ for key in keys_to_retrieve:
47
+ if key in response_json:
48
+ retrieved_metadata[key] = response_json[key]
49
+
50
+ return retrieved_metadata