Spaces:

CoreyMorris
/

MMLU-by-task-Leaderboard

Running

App Files Files Community

Corey Morris commited on Aug 21, 2023

Commit

38d88f9

•

1 Parent(s): 25b87bf

shortened file name

Browse files

Files changed (1) hide show

details_data_processor.py +47 -7

details_data_processor.py CHANGED Viewed

@@ -29,17 +29,55 @@ class DetailsDataProcessor:
         return matching_files  # Return the list of matching filenames
     @staticmethod
     def download_file(url, directory='details_data'):
-        # Define the prefix to be removed from the URL
-        url_prefix = "https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/"
-        # Remove the prefix from the URL
-        file_name_part = url.replace(url_prefix, '')
-        # Replace characters that don't play nice with file systems
-        safe_file_name = re.sub(r'[<>:"/\\|?*]', '_', file_name_part)  # Replace with '_'
         save_file_path = os.path.join(directory, safe_file_name)
         error_count = 0
@@ -52,6 +90,7 @@ class DetailsDataProcessor:
             # Writing the content to the specified file
             with open(save_file_path, 'wb') as file:
                 file.write(r.content)
             success_count += 1
         except requests.ConnectionError as e:
@@ -66,6 +105,7 @@ class DetailsDataProcessor:
         return error_count, success_count
     @staticmethod
     def single_file_pipeline(url, filename):
         DetailsDataProcessor.download_file(url, filename)

         return matching_files  # Return the list of matching filenames
+    # @staticmethod
+    # def download_file(url, directory='details_data'):
+    #     # Define the prefix to be removed from the URL
+    #     url_prefix = "https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/"
+    #     # Remove the prefix from the URL
+    #     file_name_part = url.replace(url_prefix, '')
+    #     # Replace characters that don't play nice with file systems
+    #     safe_file_name = re.sub(r'[<>:"/\\|?*]', '_', file_name_part)  # Replace with '_'
+    #     save_file_path = os.path.join(directory, safe_file_name)
+    #     error_count = 0
+    #     success_count = 0
+    #     try:
+    #         # Sending a GET request
+    #         r = requests.get(url, allow_redirects=True)
+    #         r.raise_for_status()
+    #         # Writing the content to the specified file
+    #         with open(save_file_path, 'wb') as file:
+    #             file.write(r.content)
+    #         success_count += 1
+    #     except requests.ConnectionError as e:
+    #         error_count += 1
+    #     except requests.HTTPError as e:
+    #         error_count += 1
+    #     except FileNotFoundError as e:
+    #         error_count += 1
+    #     except Exception as e:
+    #         error_count += 1
+    #     return error_count, success_count
     @staticmethod
     def download_file(url, directory='details_data'):
+        # Extract relevant parts from the URL
+        segments = url.split('/')
+        organization = segments[-3]
+        model_name = segments[-2]
+        task = segments[-1].split('_')[0]  # Assuming task is part of the last segment
+        # Construct the filename
+        safe_file_name = f"{organization}_{model_name}_{task}.json"
+        # Create the full save file path
         save_file_path = os.path.join(directory, safe_file_name)
         error_count = 0
             # Writing the content to the specified file
             with open(save_file_path, 'wb') as file:
                 file.write(r.content)
+            print(save_file_path)
             success_count += 1
         except requests.ConnectionError as e:
         return error_count, success_count
     @staticmethod
     def single_file_pipeline(url, filename):
         DetailsDataProcessor.download_file(url, filename)