Corey Morris commited on
Commit
cc32c4f
1 Parent(s): f228d38

Build URL from file path is working

Browse files
details_data_processor.py CHANGED
@@ -5,6 +5,7 @@ import json
5
  import re
6
  import numpy as np
7
  import requests
 
8
 
9
  class DetailsDataProcessor:
10
  # Download
@@ -42,25 +43,15 @@ class DetailsDataProcessor:
42
  return df
43
 
44
  @staticmethod
45
- def generate_url(file_path):
46
- base_url = 'https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/'
47
-
48
-
49
- organization = '64bits'
50
- model = 'LexPodLM-13B'
51
- filename = '_2023-07-25T13%3A41%3A51.227672.json'
52
- # extract organization, model, and filename from file_path instead of hardcoding
53
- # filename = file_path.split('/')[-1]
54
-
55
-
56
-
57
- other_chunk = 'details_harness%7ChendrycksTest-moral_scenarios%7C5'
58
- constructed_url = base_url + organization + '/' + model + '/' + other_chunk + filename
59
- return constructed_url
60
-
61
-
62
-
63
-
64
 
65
  def pipeline(self):
66
  dataframes = []
 
5
  import re
6
  import numpy as np
7
  import requests
8
+ from urllib.parse import quote
9
 
10
  class DetailsDataProcessor:
11
  # Download
 
43
  return df
44
 
45
  @staticmethod
46
+ def build_url(file_path):
47
+ segments = file_path.split('/')
48
+ bits = segments[1]
49
+ model_name = segments[2]
50
+ timestamp = segments[3].split('_')[1]
51
+
52
+ url = f'https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/{bits}/{model_name}/details_harness%7ChendrycksTest-moral_scenarios%7C5_{quote(timestamp, safe="")}'
53
+ print(url)
54
+ return url
 
 
 
 
 
 
 
 
 
 
55
 
56
  def pipeline(self):
57
  dataframes = []
test_details_data_processing.py CHANGED
@@ -20,13 +20,16 @@ class TestDetailsDataProcessor(unittest.TestCase):
20
  self.assertTrue(os.path.exists('test.html'))
21
  os.remove('test.html')
22
 
23
- def test_generate_url(self):
24
- results_file_path = "64bits/LexPodLM-13B/results_2023-07-25T13:41:51.227672.json"
25
- expected_url = 'https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/64bits/LexPodLM-13B/details_harness%7ChendrycksTest-moral_scenarios%7C5_2023-07-25T13%3A41%3A51.227672.json'
26
-
27
-
28
- constructed_url = self.processor.generate_url(results_file_path)
29
- self.assertEqual(expected_url, constructed_url)
 
 
 
30
 
31
  def test_pipeline(self):
32
  df = self.processor.pipeline()
 
20
  self.assertTrue(os.path.exists('test.html'))
21
  os.remove('test.html')
22
 
23
+ def test_build_url(self):
24
+ test_cases = [
25
+ ('results/64bits/LexPodLM-13B/results_2023-07-25T13:41:51.227672.json',
26
+ 'https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/64bits/LexPodLM-13B/details_harness%7ChendrycksTest-moral_scenarios%7C5_2023-07-25T13%3A41%3A51.227672.json'),
27
+ ('results/AlpinDale/pygmalion-instruct/results_2023-08-17T11:20:15.687659.json',
28
+ 'https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/AlpinDale/pygmalion-instruct/details_harness%7ChendrycksTest-moral_scenarios%7C5_2023-08-17T11%3A20%3A15.687659.json')
29
+ ]
30
+
31
+ for file_path, expected in test_cases:
32
+ assert self.processor.build_url(file_path) == expected, f"Test failed for file_path: {file_path}"
33
 
34
  def test_pipeline(self):
35
  df = self.processor.pipeline()