Alex Cabrera commited on
Commit
84b824e
1 Parent(s): 7253275

new charts

Browse files
evals/.zeno_cache_country-area/reports.pickle CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f72e49e684119b434fba6cbd8369a9ea077f8d05bcba159a8456569de2b7b66
3
  size 3880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebe923fd1e82b53bd95580dff84cfae8e0c37824c57186e218e921a01c397b1c
3
  size 3880
evals/.zeno_cache_crossword/reports.pickle CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c1dc251387b1070c8e8b4ce142f75d26006804580d8c2239a9168913460528b
3
  size 6586
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4335d3a1a4a7ce47c4c9ee39811c090bdeb3a2d565085b411b1e9a471052d705
3
  size 6586
evals/.zeno_cache_med-mcqa/PREDISTILLsubject.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47ada9af3bfaaa3ea9c77bed6774b474d5cf505676302d443c20ace3db98f7a5
3
+ size 11589
evals/.zeno_cache_med-mcqa/folders.pickle CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec0a6ccf9debf1c16781445c4b9106080d00478b0559469336db7c7b7b9711c8
3
- size 5
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c21f58777ece01ce867731803c284f07d64ef69d67712eeca9bf9bda7ac4f38b
3
+ size 29
evals/.zeno_cache_med-mcqa/reports.pickle CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec0a6ccf9debf1c16781445c4b9106080d00478b0559469336db7c7b7b9711c8
3
- size 5
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0150a18fcad53c825bfed305a0a05c6ddc9cc8bd5f8cd70e630ed4d273d51aa4
3
+ size 3033
evals/.zeno_cache_med-mcqa/slices.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4b521b1fc398b7bf3b6be870e597b077110fc2fb2ef61b52b4a204e3b5a45f0
3
+ size 2295
evals/evals.yaml CHANGED
@@ -13,6 +13,7 @@
13
  - med-mcqa:
14
  results-file: ./medmcqa/med-3.5-turbo.jsonl
15
  second-results-file: ./medmcqa/med-4.jsonl
 
16
  link: https://github.com/openai/evals/commit/19b2cf9ff96b08af68f5c3b4d2c90184844a4fe6
17
  description: Multiple choice questions from different medical areas.
18
  - aba-mrpc:
 
13
  - med-mcqa:
14
  results-file: ./medmcqa/med-3.5-turbo.jsonl
15
  second-results-file: ./medmcqa/med-4.jsonl
16
+ functions-file: ./medmcqa/med_fns.py
17
  link: https://github.com/openai/evals/commit/19b2cf9ff96b08af68f5c3b4d2c90184844a4fe6
18
  description: Multiple choice questions from different medical areas.
19
  - aba-mrpc:
evals/medmcqa/med_fns.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ from zeno import DistillReturn, distill
4
+
5
+ finder = "Subject:(.*)"
6
+
7
+
8
+ @distill
9
+ def subject(df, ops):
10
+ ret_subjs = []
11
+ for entry in df[ops.data_column]:
12
+ ret_subjs.append(re.search(finder, entry[1]["content"]).group(1))
13
+
14
+ return DistillReturn(distill_output=ret_subjs)
zeno-evals-hub/main.py CHANGED
@@ -88,7 +88,6 @@ def command_line():
88
  config.editable = False
89
 
90
  zeno_obj = zeno(config)
91
- print(config.metadata["expected"])
92
  if zeno_obj is None:
93
  sys.exit(1)
94
  server = get_server(zeno_obj)
 
88
  config.editable = False
89
 
90
  zeno_obj = zeno(config)
 
91
  if zeno_obj is None:
92
  sys.exit(1)
93
  server = get_server(zeno_obj)