Corey Morris commited on
Commit
7f2d984
1 Parent(s): 2f457d8

updated pipeline and init

Browse files
Files changed (1) hide show
  1. details_data_processor.py +22 -7
details_data_processor.py CHANGED
@@ -15,9 +15,16 @@ class DetailsDataProcessor:
15
  # Download
16
  #url example https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/64bits/LexPodLM-13B/details_harness%7ChendrycksTest-moral_scenarios%7C5_2023-07-25T13%3A41%3A51.227672.json
17
 
 
 
 
 
18
  def __init__(self, directory='results', pattern='results*.json'):
19
  self.directory = directory
20
  self.pattern = pattern
 
 
 
21
 
22
  def _find_files(self, directory='results', pattern='results*.json'):
23
  matching_files = [] # List to hold matching filenames
@@ -94,12 +101,20 @@ class DetailsDataProcessor:
94
  return url
95
 
96
  def pipeline(self):
97
- dataframes = []
 
98
  file_paths = self._find_files(self.directory, self.pattern)
99
  for file_path in file_paths:
100
- print(file_path)
101
- url = self.generate_url(file_path)
102
- file_path = file_path.split('/')[-1]
103
- df = self.single_file_pipeline(url, file_path)
104
- dataframes.append(df)
105
- return dataframes
 
 
 
 
 
 
 
 
15
  # Download
16
  #url example https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/64bits/LexPodLM-13B/details_harness%7ChendrycksTest-moral_scenarios%7C5_2023-07-25T13%3A41%3A51.227672.json
17
 
18
+ # def __init__(self, directory='results', pattern='results*.json'):
19
+ # self.directory = directory
20
+ # self.pattern = pattern
21
+
22
  def __init__(self, directory='results', pattern='results*.json'):
23
  self.directory = directory
24
  self.pattern = pattern
25
+ if not os.path.exists('details_data'):
26
+ os.makedirs('details_data')
27
+
28
 
29
  def _find_files(self, directory='results', pattern='results*.json'):
30
  matching_files = [] # List to hold matching filenames
 
101
  return url
102
 
103
  def pipeline(self):
104
+ error_count = 0
105
+ success_count = 0
106
  file_paths = self._find_files(self.directory, self.pattern)
107
  for file_path in file_paths:
108
+ print(f"Processing file path: {file_path}")
109
+ url = self.build_url(file_path)
110
+ if url:
111
+ errors, successes = self.download_file(url)
112
+ error_count += errors
113
+ success_count += successes
114
+ else:
115
+ print(f"Error building URL for file path: {file_path}")
116
+ error_count += 1
117
+
118
+ print(f"Downloaded {success_count} files successfully. Encountered {error_count} errors.")
119
+ return success_count, error_count
120
+