Corey Morris commited on
Commit
25b87bf
1 Parent(s): 0a77c60

using URL as file name

Browse files
Files changed (1) hide show
  1. details_data_processor.py +14 -15
details_data_processor.py CHANGED
@@ -28,28 +28,27 @@ class DetailsDataProcessor:
28
  matching_files.append(filename) # Append the matching filename to the list
29
  return matching_files # Return the list of matching filenames
30
 
 
31
  @staticmethod
32
- def download_file(url, save_file_path):
33
- #TODO: I may not need to save the file. I can just read it in and convert to a dataframe
34
- # Get the current date and time
35
- error_count = 0
36
- success_count = 0
37
- # timestamp = datetime.now()
38
 
39
- # Format the timestamp as a string, suitable for use in a filename
40
- # filename_timestamp = timestamp.strftime("%Y-%m-%dT%H-%M-%S")
41
 
42
- # Generate a unique UUID
43
- unique_id = uuid.uuid4()
44
 
45
- # Append the UUID to the filename
46
- save_file_path = save_file_path + "_" + str(unique_id) + ".json"
47
 
 
 
48
  try:
49
  # Sending a GET request
50
  r = requests.get(url, allow_redirects=True)
51
- r.raise_for_status() # Raises an HTTPError if the HTTP request returned an unsuccessful status code
52
-
53
  # Writing the content to the specified file
54
  with open(save_file_path, 'wb') as file:
55
  file.write(r.content)
@@ -63,8 +62,8 @@ class DetailsDataProcessor:
63
  error_count += 1
64
  except Exception as e:
65
  error_count += 1
66
- return error_count, success_count
67
 
 
68
 
69
 
70
  @staticmethod
 
28
  matching_files.append(filename) # Append the matching filename to the list
29
  return matching_files # Return the list of matching filenames
30
 
31
+
32
  @staticmethod
33
+ def download_file(url, directory='details_data'):
34
+ # Define the prefix to be removed from the URL
35
+ url_prefix = "https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/"
 
 
 
36
 
37
+ # Remove the prefix from the URL
38
+ file_name_part = url.replace(url_prefix, '')
39
 
40
+ # Replace characters that don't play nice with file systems
41
+ safe_file_name = re.sub(r'[<>:"/\\|?*]', '_', file_name_part) # Replace with '_'
42
 
43
+ save_file_path = os.path.join(directory, safe_file_name)
 
44
 
45
+ error_count = 0
46
+ success_count = 0
47
  try:
48
  # Sending a GET request
49
  r = requests.get(url, allow_redirects=True)
50
+ r.raise_for_status()
51
+
52
  # Writing the content to the specified file
53
  with open(save_file_path, 'wb') as file:
54
  file.write(r.content)
 
62
  error_count += 1
63
  except Exception as e:
64
  error_count += 1
 
65
 
66
+ return error_count, success_count
67
 
68
 
69
  @staticmethod