Corey Morris commited on
Commit
38d88f9
1 Parent(s): 25b87bf

shortened file name

Browse files
Files changed (1) hide show
  1. details_data_processor.py +47 -7
details_data_processor.py CHANGED
@@ -29,17 +29,55 @@ class DetailsDataProcessor:
29
  return matching_files # Return the list of matching filenames
30
 
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  @staticmethod
33
  def download_file(url, directory='details_data'):
34
- # Define the prefix to be removed from the URL
35
- url_prefix = "https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/"
 
 
 
36
 
37
- # Remove the prefix from the URL
38
- file_name_part = url.replace(url_prefix, '')
39
-
40
- # Replace characters that don't play nice with file systems
41
- safe_file_name = re.sub(r'[<>:"/\\|?*]', '_', file_name_part) # Replace with '_'
42
 
 
43
  save_file_path = os.path.join(directory, safe_file_name)
44
 
45
  error_count = 0
@@ -52,6 +90,7 @@ class DetailsDataProcessor:
52
  # Writing the content to the specified file
53
  with open(save_file_path, 'wb') as file:
54
  file.write(r.content)
 
55
 
56
  success_count += 1
57
  except requests.ConnectionError as e:
@@ -66,6 +105,7 @@ class DetailsDataProcessor:
66
  return error_count, success_count
67
 
68
 
 
69
  @staticmethod
70
  def single_file_pipeline(url, filename):
71
  DetailsDataProcessor.download_file(url, filename)
 
29
  return matching_files # Return the list of matching filenames
30
 
31
 
32
+ # @staticmethod
33
+ # def download_file(url, directory='details_data'):
34
+ # # Define the prefix to be removed from the URL
35
+ # url_prefix = "https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/"
36
+
37
+ # # Remove the prefix from the URL
38
+ # file_name_part = url.replace(url_prefix, '')
39
+
40
+ # # Replace characters that don't play nice with file systems
41
+ # safe_file_name = re.sub(r'[<>:"/\\|?*]', '_', file_name_part) # Replace with '_'
42
+
43
+ # save_file_path = os.path.join(directory, safe_file_name)
44
+
45
+ # error_count = 0
46
+ # success_count = 0
47
+ # try:
48
+ # # Sending a GET request
49
+ # r = requests.get(url, allow_redirects=True)
50
+ # r.raise_for_status()
51
+
52
+ # # Writing the content to the specified file
53
+ # with open(save_file_path, 'wb') as file:
54
+ # file.write(r.content)
55
+
56
+ # success_count += 1
57
+ # except requests.ConnectionError as e:
58
+ # error_count += 1
59
+ # except requests.HTTPError as e:
60
+ # error_count += 1
61
+ # except FileNotFoundError as e:
62
+ # error_count += 1
63
+ # except Exception as e:
64
+ # error_count += 1
65
+
66
+ # return error_count, success_count
67
+
68
+
69
  @staticmethod
70
  def download_file(url, directory='details_data'):
71
+ # Extract relevant parts from the URL
72
+ segments = url.split('/')
73
+ organization = segments[-3]
74
+ model_name = segments[-2]
75
+ task = segments[-1].split('_')[0] # Assuming task is part of the last segment
76
 
77
+ # Construct the filename
78
+ safe_file_name = f"{organization}_{model_name}_{task}.json"
 
 
 
79
 
80
+ # Create the full save file path
81
  save_file_path = os.path.join(directory, safe_file_name)
82
 
83
  error_count = 0
 
90
  # Writing the content to the specified file
91
  with open(save_file_path, 'wb') as file:
92
  file.write(r.content)
93
+ print(save_file_path)
94
 
95
  success_count += 1
96
  except requests.ConnectionError as e:
 
105
  return error_count, success_count
106
 
107
 
108
+
109
  @staticmethod
110
  def single_file_pipeline(url, filename):
111
  DetailsDataProcessor.download_file(url, filename)