taesiri commited on
Commit
e0656c6
β€’
1 Parent(s): ce7f029
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .gitattributes +40 -0
  2. app.py +83 -20
  3. results-cot/Mixtral-8x7B-Instruct-v0.1.csv +3 -0
  4. results-cot/{gpt-4v-CoT-Azure.csv β†’ Mixtral-8x7B-Instruct-v0.1.jpg} +2 -2
  5. results-cot/Mixtral-8x7B-Instruct-v0.1.pkl +3 -0
  6. results-cot/{gpt-4v-CoT-Azure.jpg β†’ Mixtral-8x7B-Instruct-v0.1.png} +2 -2
  7. results-cot/Qwen1.5-72B-Chat.csv +3 -0
  8. results-cot/{gpt-4v-CoT-Azure.pkl β†’ Qwen1.5-72B-Chat.jpg} +2 -2
  9. results-cot/Qwen1.5-72B-Chat.pkl +3 -0
  10. results-cot/{gpt-4v-CoT-Azure.png β†’ Qwen1.5-72B-Chat.png} +2 -2
  11. results-cot/gemma-7b-it.csv +3 -0
  12. results-cot/gemma-7b-it.jpg +3 -0
  13. results-cot/gemma-7b-it.pkl +3 -0
  14. results-cot/gemma-7b-it.png +3 -0
  15. results-cot/{gpt-3.5-CoT.csv β†’ gpt-3.5-turbo-0125.csv} +0 -0
  16. results-cot/{gpt-3.5-CoT.jpg β†’ gpt-3.5-turbo-0125.jpg} +0 -0
  17. results-cot/{gpt-3.5-CoT.pkl β†’ gpt-3.5-turbo-0125.pkl} +0 -0
  18. results-cot/{gpt-3.5-CoT.png β†’ gpt-3.5-turbo-0125.png} +0 -0
  19. results-vision-CoT/gemini-pro-vision-CoT.csv +0 -3
  20. results-vision-CoT/gemini-pro-vision-CoT.jpg +0 -3
  21. results-vision-CoT/gemini-pro-vision-CoT.pkl +0 -3
  22. results-vision-CoT/gemini-pro-vision-CoT.png +0 -3
  23. results-vision/gemini-pro-vision-CoT.csv +0 -3
  24. results-vision/gemini-pro-vision-CoT.jpg +0 -3
  25. results-vision/gemini-pro-vision-CoT.pkl +0 -3
  26. results-vision/gemini-pro-vision-CoT.png +0 -3
  27. results-vision/gpt-4v-CoT.csv +0 -3
  28. results-vision/gpt-4v-CoT.jpg +0 -3
  29. results-vision/gpt-4v-CoT.pkl +0 -3
  30. results-vision/gpt-4v-CoT.png +0 -3
  31. results/CodeLlama-70b-Instruct-hf.csv +3 -0
  32. results/{CodeLlama-70B.jpg β†’ CodeLlama-70b-Instruct-hf.jpg} +0 -0
  33. results/{CodeLlama-70B.pkl β†’ CodeLlama-70b-Instruct-hf.pkl} +0 -0
  34. results/{CodeLlama-70B.png β†’ CodeLlama-70b-Instruct-hf.png} +0 -0
  35. results/Llama-2-70b-chat-hf.csv +3 -0
  36. results/Mistral-7B-Instruct-v0.2.csv +3 -0
  37. results/Mixtral-8x7B-Instruct-v0.1.csv +3 -0
  38. results/{Mixtral-8x7B-Instruct-0.1.jpg β†’ Mixtral-8x7B-Instruct-v0.1.jpg} +0 -0
  39. results/{Mixtral-8x7B-Instruct-0.1.pkl β†’ Mixtral-8x7B-Instruct-v0.1.pkl} +0 -0
  40. results/{Mixtral-8x7B-Instruct-0.1.png β†’ Mixtral-8x7B-Instruct-v0.1.png} +0 -0
  41. results/Qwen1.5-72B-Chat.csv +3 -0
  42. results/StripedHyena-Nous-7B.csv +3 -0
  43. results/Yi-34B-Chat.csv +3 -0
  44. results/claude-3-haiku-20240307.csv +3 -0
  45. results/{Claude-3-Haiku.jpg β†’ claude-3-haiku-20240307.jpg} +0 -0
  46. results/{Claude-3-Haiku.pkl β†’ claude-3-haiku-20240307.pkl} +0 -0
  47. results/{Claude-3-Haiku.png β†’ claude-3-haiku-20240307.png} +0 -0
  48. results/claude-3-opus-20240229.csv +3 -0
  49. results/{Claude-3-Opus.jpg β†’ claude-3-opus-20240229.jpg} +0 -0
  50. results/{Claude-3-Opus.pkl β†’ claude-3-opus-20240229.pkl} +0 -0
.gitattributes CHANGED
@@ -115,3 +115,43 @@ results-cot/gpt-4v-CoT-Azure.pkl filter=lfs diff=lfs merge=lfs -text
115
  results-cot/gpt-4v-CoT-Azure.csv filter=lfs diff=lfs merge=lfs -text
116
  results-vision-CoT/gemini-pro-vision-CoT.csv filter=lfs diff=lfs merge=lfs -text
117
  results-cot/gpt-3.5-CoT.csv filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  results-cot/gpt-4v-CoT-Azure.csv filter=lfs diff=lfs merge=lfs -text
116
  results-vision-CoT/gemini-pro-vision-CoT.csv filter=lfs diff=lfs merge=lfs -text
117
  results-cot/gpt-3.5-CoT.csv filter=lfs diff=lfs merge=lfs -text
118
+ results/claude-3-haiku-20240307.csv filter=lfs diff=lfs merge=lfs -text
119
+ results/claude-3-opus-20240229.csv filter=lfs diff=lfs merge=lfs -text
120
+ results-cot/gemma-7b-it.csv filter=lfs diff=lfs merge=lfs -text
121
+ results-cot/gpt-3.5-turbo-0125.csv filter=lfs diff=lfs merge=lfs -text
122
+ results/gpt-3.5-turbo-0125.csv filter=lfs diff=lfs merge=lfs -text
123
+ results-cot/Mixtral-8x7B-Instruct-v0.1.csv filter=lfs diff=lfs merge=lfs -text
124
+ results/gemma-7b-it.csv filter=lfs diff=lfs merge=lfs -text
125
+ results-cot/Qwen1.5-72B-Chat.csv filter=lfs diff=lfs merge=lfs -text
126
+ results/CodeLlama-70b-Instruct-hf.csv filter=lfs diff=lfs merge=lfs -text
127
+ results/Mixtral-8x7B-Instruct-v0.1.csv filter=lfs diff=lfs merge=lfs -text
128
+ results-cot/gemma-7b-it.pkl filter=lfs diff=lfs merge=lfs -text
129
+ results/claude-3-haiku-20240307.pkl filter=lfs diff=lfs merge=lfs -text
130
+ results/gemma-7b-it.pkl filter=lfs diff=lfs merge=lfs -text
131
+ results-cot/gpt-3.5-turbo-0125.pkl filter=lfs diff=lfs merge=lfs -text
132
+ results-cot/Mixtral-8x7B-Instruct-v0.1.pkl filter=lfs diff=lfs merge=lfs -text
133
+ results/Mixtral-8x7B-Instruct-v0.1.pkl filter=lfs diff=lfs merge=lfs -text
134
+ results/claude-3-opus-20240229.pkl filter=lfs diff=lfs merge=lfs -text
135
+ results-cot/Qwen1.5-72B-Chat.pkl filter=lfs diff=lfs merge=lfs -text
136
+ results/CodeLlama-70b-Instruct-hf.pkl filter=lfs diff=lfs merge=lfs -text
137
+ results/gpt-3.5-turbo-0125.pkl filter=lfs diff=lfs merge=lfs -text
138
+ results/claude-3-haiku-20240307.jpg filter=lfs diff=lfs merge=lfs -text
139
+ results/claude-3-opus-20240229.jpg filter=lfs diff=lfs merge=lfs -text
140
+ results/gpt-3.5-turbo-0125.jpg filter=lfs diff=lfs merge=lfs -text
141
+ results-cot/gpt-3.5-turbo-0125.jpg filter=lfs diff=lfs merge=lfs -text
142
+ results/Mixtral-8x7B-Instruct-v0.1.jpg filter=lfs diff=lfs merge=lfs -text
143
+ results-cot/Qwen1.5-72B-Chat.jpg filter=lfs diff=lfs merge=lfs -text
144
+ results/gemma-7b-it.jpg filter=lfs diff=lfs merge=lfs -text
145
+ results-cot/Mixtral-8x7B-Instruct-v0.1.jpg filter=lfs diff=lfs merge=lfs -text
146
+ results-cot/gemma-7b-it.jpg filter=lfs diff=lfs merge=lfs -text
147
+ results/CodeLlama-70b-Instruct-hf.jpg filter=lfs diff=lfs merge=lfs -text
148
+ results-cot/gemma-7b-it.png filter=lfs diff=lfs merge=lfs -text
149
+ results-cot/gpt-3.5-turbo-0125.png filter=lfs diff=lfs merge=lfs -text
150
+ results/gpt-3.5-turbo-0125.png filter=lfs diff=lfs merge=lfs -text
151
+ results/CodeLlama-70b-Instruct-hf.png filter=lfs diff=lfs merge=lfs -text
152
+ results/Mixtral-8x7B-Instruct-v0.1.png filter=lfs diff=lfs merge=lfs -text
153
+ results/claude-3-opus-20240229.png filter=lfs diff=lfs merge=lfs -text
154
+ results-cot/Mixtral-8x7B-Instruct-v0.1.png filter=lfs diff=lfs merge=lfs -text
155
+ results-cot/Qwen1.5-72B-Chat.png filter=lfs diff=lfs merge=lfs -text
156
+ results/claude-3-haiku-20240307.png filter=lfs diff=lfs merge=lfs -text
157
+ results/gemma-7b-it.png filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -10,7 +10,7 @@ vision_results = glob("results-vision/*.pkl")
10
  # Load CoT text benchmark results
11
  cot_text_results = glob("results-cot/*.pkl")
12
  # Load CoT vision benchmark results
13
- cot_vision_results = glob("results-vision-CoT/*.pkl")
14
 
15
  # Function to load data, add model type and name
16
  def load_data(files, model_type):
@@ -27,18 +27,22 @@ def load_data(files, model_type):
27
  data = load_data(csv_results, "Text Only")
28
  vision_data = load_data(vision_results, "Vision")
29
  cot_text_data = load_data(cot_text_results, "CoT Text Only")
30
- cot_vision_data = load_data(cot_vision_results, "CoT Vision")
31
 
32
  # Combine all data into a single DataFrame
33
  all_data = pd.concat(
34
- [data, vision_data, cot_text_data, cot_vision_data], ignore_index=True
35
  )
36
 
37
  all_model_names = all_data["Model Name"].unique()
38
  all_text_only_model_names = list(
39
  all_data[all_data["Model Type"] == "Text Only"]["Model Name"].unique()
40
  )
41
- print(all_text_only_model_names)
 
 
 
 
42
 
43
  ## Continue with the cold code --
44
  # TODO: Update me to read from all_data for later
@@ -50,7 +54,7 @@ vision_data = {file: pd.read_pickle(file) for file in vision_results}
50
  # Load the CoT text files into a dict
51
  cot_text_data = {file: pd.read_pickle(file) for file in cot_text_results}
52
  # Load the CoT vision files into a dict
53
- cot_vision_data = {file: pd.read_pickle(file) for file in cot_vision_results}
54
 
55
 
56
  def calculate_accuracy(df):
@@ -96,13 +100,13 @@ def process_data(data):
96
  text_data_for_df = process_data(data)
97
  vision_data_for_df = process_data(vision_data)
98
  cot_text_data_for_df = process_data(cot_text_data)
99
- cot_vision_data_for_df = process_data(cot_vision_data)
100
 
101
  # Create DataFrames
102
  accuracy_df = pd.DataFrame(text_data_for_df, columns=column_names)
103
  vision_accuracy_df = pd.DataFrame(vision_data_for_df, columns=column_names)
104
  cot_text_accuracy_df = pd.DataFrame(cot_text_data_for_df, columns=column_names)
105
- cot_vision_accuracy_df = pd.DataFrame(cot_vision_data_for_df, columns=column_names)
106
 
107
  # Function to finalize DataFrame
108
  def finalize_df(df):
@@ -117,7 +121,7 @@ def finalize_df(df):
117
  accuracy_df = finalize_df(accuracy_df)
118
  vision_accuracy_df = finalize_df(vision_accuracy_df)
119
  cot_text_accuracy_df = finalize_df(cot_text_accuracy_df)
120
- cot_vision_accuracy_df = finalize_df(cot_vision_accuracy_df)
121
 
122
 
123
  def load_heatmap(evt: gr.SelectData):
@@ -176,6 +180,43 @@ def calculate_order_by_first_substring(selected_models):
176
  return text_only_filtered, number_of_queries, number_of_fsms
177
 
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  with gr.Blocks() as demo:
180
  gr.Markdown("# FSM Benchmark Leaderboard")
181
  with gr.Tab("Text-only Benchmark"):
@@ -196,8 +237,8 @@ with gr.Blocks() as demo:
196
  fn=load_vision_heatmap, outputs=[heatmap_image_vision]
197
  )
198
 
199
- with gr.Tab("CoT Text-only Benchmark"):
200
- gr.Markdown("# CoT Text-only Leaderboard")
201
  cot_leader_board_text = gr.Dataframe(
202
  cot_text_accuracy_df, headers=headers_with_icons
203
  )
@@ -207,16 +248,16 @@ with gr.Blocks() as demo:
207
  fn=load_cot_heatmap, outputs=[cot_heatmap_image_text]
208
  )
209
 
210
- with gr.Tab("CoT Vision Benchmark"):
211
- gr.Markdown("# CoT Vision Benchmark Leaderboard")
212
- cot_leader_board_vision = gr.Dataframe(
213
- cot_vision_accuracy_df, headers=headers_with_icons
214
- )
215
- gr.Markdown("## Heatmap")
216
- cot_heatmap_image_vision = gr.Image(label="", show_label=False)
217
- cot_leader_board_vision.select(
218
- fn=load_cot_vision_heatmap, outputs=[cot_heatmap_image_vision]
219
- )
220
 
221
  with gr.Tab("Constraint Text-only Results"):
222
  gr.Markdown("## Constraint Text-only Leaderboard by first substring")
@@ -240,4 +281,26 @@ with gr.Blocks() as demo:
240
  queue=True,
241
  )
242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  demo.launch()
 
10
  # Load CoT text benchmark results
11
  cot_text_results = glob("results-cot/*.pkl")
12
  # Load CoT vision benchmark results
13
+ # cot_vision_results = glob("results-vision-CoT/*.pkl")
14
 
15
  # Function to load data, add model type and name
16
  def load_data(files, model_type):
 
27
  data = load_data(csv_results, "Text Only")
28
  vision_data = load_data(vision_results, "Vision")
29
  cot_text_data = load_data(cot_text_results, "CoT Text Only")
30
+ # cot_vision_data = load_data(cot_vision_results, "CoT Vision")
31
 
32
  # Combine all data into a single DataFrame
33
  all_data = pd.concat(
34
+ [data, vision_data, cot_text_data], ignore_index=True
35
  )
36
 
37
  all_model_names = all_data["Model Name"].unique()
38
  all_text_only_model_names = list(
39
  all_data[all_data["Model Type"] == "Text Only"]["Model Name"].unique()
40
  )
41
+ all_cot_text_only_models = list(
42
+ all_data[all_data["Model Type"] == "CoT Text Only"]["Model Name"].unique()
43
+ )
44
+
45
+
46
 
47
  ## Continue with the cold code --
48
  # TODO: Update me to read from all_data for later
 
54
  # Load the CoT text files into a dict
55
  cot_text_data = {file: pd.read_pickle(file) for file in cot_text_results}
56
  # Load the CoT vision files into a dict
57
+ # cot_vision_data = {file: pd.read_pickle(file) for file in cot_vision_results}
58
 
59
 
60
  def calculate_accuracy(df):
 
100
  text_data_for_df = process_data(data)
101
  vision_data_for_df = process_data(vision_data)
102
  cot_text_data_for_df = process_data(cot_text_data)
103
+ # cot_vision_data_for_df = process_data(cot_vision_data)
104
 
105
  # Create DataFrames
106
  accuracy_df = pd.DataFrame(text_data_for_df, columns=column_names)
107
  vision_accuracy_df = pd.DataFrame(vision_data_for_df, columns=column_names)
108
  cot_text_accuracy_df = pd.DataFrame(cot_text_data_for_df, columns=column_names)
109
+ # cot_vision_accuracy_df = pd.DataFrame(cot_vision_data_for_df, columns=column_names)
110
 
111
  # Function to finalize DataFrame
112
  def finalize_df(df):
 
121
  accuracy_df = finalize_df(accuracy_df)
122
  vision_accuracy_df = finalize_df(vision_accuracy_df)
123
  cot_text_accuracy_df = finalize_df(cot_text_accuracy_df)
124
+ # cot_vision_accuracy_df = finalize_df(cot_vision_accuracy_df)
125
 
126
 
127
  def load_heatmap(evt: gr.SelectData):
 
180
  return text_only_filtered, number_of_queries, number_of_fsms
181
 
182
 
183
+
184
+ def calculate_order_by_first_substring_cot(selected_models):
185
+
186
+ first_columns = all_data[all_data["substring_index"] == 1]
187
+ query_ids_df = first_columns[first_columns["Model Type"] == "CoT Text Only"]
188
+ query_ids_df = query_ids_df[query_ids_df["Model Name"].isin(selected_models)]
189
+
190
+ query_ids_df = query_ids_df.groupby("query_id").filter(
191
+ lambda x: x["parsed_judge_response"].eq(1).all()
192
+ )
193
+
194
+ fsm_ids = query_ids_df.fsm_id.unique()
195
+
196
+ text_only = all_data[all_data["Model Type"] == "CoT Text Only"]
197
+ text_only_filtered = text_only[text_only["fsm_id"].isin(fsm_ids)]
198
+
199
+ query_ids = text_only_filtered.query_id.unique()
200
+ text_only_filtered = (
201
+ text_only_filtered.groupby(["Model Name"])["parsed_judge_response"]
202
+ .mean()
203
+ .reset_index()
204
+ )
205
+
206
+ text_only_filtered["Accuracy"] = text_only_filtered["parsed_judge_response"] * 100
207
+ text_only_filtered.drop("parsed_judge_response", axis=1, inplace=True)
208
+
209
+ text_only_filtered["Accuracy"] = text_only_filtered["Accuracy"].apply(
210
+ lambda x: round(x, 2)
211
+ )
212
+ text_only_filtered.sort_values("Accuracy", ascending=False, inplace=True)
213
+
214
+ number_of_queries = len(query_ids)
215
+ number_of_fsms = len(fsm_ids)
216
+
217
+ return text_only_filtered, number_of_queries, number_of_fsms
218
+
219
+
220
  with gr.Blocks() as demo:
221
  gr.Markdown("# FSM Benchmark Leaderboard")
222
  with gr.Tab("Text-only Benchmark"):
 
237
  fn=load_vision_heatmap, outputs=[heatmap_image_vision]
238
  )
239
 
240
+ with gr.Tab("Text-only Benchmark (CoT)"):
241
+ gr.Markdown("# Text-only Leaderboard (CoT)")
242
  cot_leader_board_text = gr.Dataframe(
243
  cot_text_accuracy_df, headers=headers_with_icons
244
  )
 
248
  fn=load_cot_heatmap, outputs=[cot_heatmap_image_text]
249
  )
250
 
251
+ # with gr.Tab("Vision Benchmark (CoT)"):
252
+ # gr.Markdown("# Vision Benchmark Leaderboard (CoT)")
253
+ # cot_leader_board_vision = gr.Dataframe(
254
+ # cot_vision_accuracy_df, headers=headers_with_icons
255
+ # )
256
+ # gr.Markdown("## Heatmap")
257
+ # cot_heatmap_image_vision = gr.Image(label="", show_label=False)
258
+ # cot_leader_board_vision.select(
259
+ # fn=load_cot_vision_heatmap, outputs=[cot_heatmap_image_vision]
260
+ # )
261
 
262
  with gr.Tab("Constraint Text-only Results"):
263
  gr.Markdown("## Constraint Text-only Leaderboard by first substring")
 
281
  queue=True,
282
  )
283
 
284
+
285
+ with gr.Tab("Constraint Text-only Results (CoT)"):
286
+ gr.Markdown("## Constraint Text-only Leaderboard by first substrin (CoT)")
287
+ included_models_cot = gr.CheckboxGroup(
288
+ label="Models to include",
289
+ choices=all_cot_text_only_models,
290
+ value=all_cot_text_only_models,
291
+ interactive=True,
292
+ )
293
+ with gr.Row():
294
+ number_of_queries_cot = gr.Textbox(label="Number of included queries")
295
+ number_of_fsms_cot = gr.Textbox(label="Number of included FSMs")
296
+
297
+ constrained_leader_board_text_cot = gr.Dataframe()
298
+
299
+ included_models_cot.select(
300
+ fn=calculate_order_by_first_substring_cot,
301
+ inputs=[included_models_cot],
302
+ outputs=[constrained_leader_board_text_cot, number_of_queries_cot, number_of_fsms_cot],
303
+ queue=True,
304
+ )
305
+
306
  demo.launch()
results-cot/Mixtral-8x7B-Instruct-v0.1.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:093e919d90609c3be8d6818cf56ca018214da3a42b78aeaf85f92581b72c5ad4
3
+ size 19494123
results-cot/{gpt-4v-CoT-Azure.csv β†’ Mixtral-8x7B-Instruct-v0.1.jpg} RENAMED
File without changes
results-cot/Mixtral-8x7B-Instruct-v0.1.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:686692584c6ba027c454d699bbf585b95e5c99bfc426810ea74b327a975b9cf3
3
+ size 19489822
results-cot/{gpt-4v-CoT-Azure.jpg β†’ Mixtral-8x7B-Instruct-v0.1.png} RENAMED
File without changes
results-cot/Qwen1.5-72B-Chat.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32681449776facf1084405001e69ed7926b79c69f9717fb159e3eb064b333636
3
+ size 15795431
results-cot/{gpt-4v-CoT-Azure.pkl β†’ Qwen1.5-72B-Chat.jpg} RENAMED
File without changes
results-cot/Qwen1.5-72B-Chat.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c20383298d4b6482ca7c30bf91822e24099dc67b71a3be10271005e25208c40
3
+ size 15778970
results-cot/{gpt-4v-CoT-Azure.png β†’ Qwen1.5-72B-Chat.png} RENAMED
File without changes
results-cot/gemma-7b-it.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8535fa3f2ef5a94b1b552859930e0476ca0f3c77ec4c277893a9ab9ef45d6c3
3
+ size 16793758
results-cot/gemma-7b-it.jpg ADDED

Git LFS Details

  • SHA256: 28be12e5ad08179e972700c578cc8089b946407e17effa2e25fb2d5129894918
  • Pointer size: 132 Bytes
  • Size of remote file: 1.34 MB
results-cot/gemma-7b-it.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c581027f8b78df5934117276cec3e53613f5ac953d045f71af4121b3ec2e1a4
3
+ size 16822239
results-cot/gemma-7b-it.png ADDED

Git LFS Details

  • SHA256: 5d10e044726def8fdebc8bd89b6cda148c315fd8d808dd7f168d4c5dbf92c2f2
  • Pointer size: 132 Bytes
  • Size of remote file: 1.01 MB
results-cot/{gpt-3.5-CoT.csv β†’ gpt-3.5-turbo-0125.csv} RENAMED
File without changes
results-cot/{gpt-3.5-CoT.jpg β†’ gpt-3.5-turbo-0125.jpg} RENAMED
File without changes
results-cot/{gpt-3.5-CoT.pkl β†’ gpt-3.5-turbo-0125.pkl} RENAMED
File without changes
results-cot/{gpt-3.5-CoT.png β†’ gpt-3.5-turbo-0125.png} RENAMED
File without changes
results-vision-CoT/gemini-pro-vision-CoT.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ebebe1d6caee19a4f714bf13eaba72e7a0b5d15281c407cd4dc53a2820ad312
3
- size 6184119
 
 
 
 
results-vision-CoT/gemini-pro-vision-CoT.jpg DELETED

Git LFS Details

  • SHA256: fed7a1736c7550edca80305d90c975e36da47331bc67f824c23b6bb5525289b4
  • Pointer size: 132 Bytes
  • Size of remote file: 1.33 MB
results-vision-CoT/gemini-pro-vision-CoT.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:049d575dbad9da04496fea752e19f915bcec445b13f3010f9c67544012c936ff
3
- size 6144275
 
 
 
 
results-vision-CoT/gemini-pro-vision-CoT.png DELETED

Git LFS Details

  • SHA256: 49ab8af8d2e3d2fb671b375a830808eb92a84e0faef35d2844f8eed62bd6acf5
  • Pointer size: 132 Bytes
  • Size of remote file: 1.01 MB
results-vision/gemini-pro-vision-CoT.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ebebe1d6caee19a4f714bf13eaba72e7a0b5d15281c407cd4dc53a2820ad312
3
- size 6184119
 
 
 
 
results-vision/gemini-pro-vision-CoT.jpg DELETED

Git LFS Details

  • SHA256: fed7a1736c7550edca80305d90c975e36da47331bc67f824c23b6bb5525289b4
  • Pointer size: 132 Bytes
  • Size of remote file: 1.33 MB
results-vision/gemini-pro-vision-CoT.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:049d575dbad9da04496fea752e19f915bcec445b13f3010f9c67544012c936ff
3
- size 6144275
 
 
 
 
results-vision/gemini-pro-vision-CoT.png DELETED

Git LFS Details

  • SHA256: 49ab8af8d2e3d2fb671b375a830808eb92a84e0faef35d2844f8eed62bd6acf5
  • Pointer size: 132 Bytes
  • Size of remote file: 1.01 MB
results-vision/gpt-4v-CoT.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:04b4de1a7a4280354c89609d15282109ee60f8f58129960dc0edbb046b12a5c6
3
- size 6374181
 
 
 
 
results-vision/gpt-4v-CoT.jpg DELETED

Git LFS Details

  • SHA256: 6d63da74c747dc220638351069b927925aaa34e580e2c00e70dd29e0d2cefebb
  • Pointer size: 132 Bytes
  • Size of remote file: 1.33 MB
results-vision/gpt-4v-CoT.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:52ae5e417e011db84976acd51a024eae7ccea1e686b7f3f0e8158cd77be4f847
3
- size 6320889
 
 
 
 
results-vision/gpt-4v-CoT.png DELETED

Git LFS Details

  • SHA256: b8a96d76a726ab67813368f0a630576aee5cda6b5264c2edc65af93932fe4a32
  • Pointer size: 132 Bytes
  • Size of remote file: 1.01 MB
results/CodeLlama-70b-Instruct-hf.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3726905a1656174f3c29edfced6f2eec63222f6be8965c0d970264901d8cfc75
3
+ size 16476347
results/{CodeLlama-70B.jpg β†’ CodeLlama-70b-Instruct-hf.jpg} RENAMED
File without changes
results/{CodeLlama-70B.pkl β†’ CodeLlama-70b-Instruct-hf.pkl} RENAMED
File without changes
results/{CodeLlama-70B.png β†’ CodeLlama-70b-Instruct-hf.png} RENAMED
File without changes
results/Llama-2-70b-chat-hf.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42a31de917b05ed5405474a348d072426474a8fb2ce7ff462dbb121e25f4b6ad
3
+ size 20760268
results/Mistral-7B-Instruct-v0.2.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29ad4985661fc41e659a631fc74ba433cd08a571048f11436ccf87ff74f0db09
3
+ size 27242025
results/Mixtral-8x7B-Instruct-v0.1.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a93e2b963a5ac8129b5284f3fd7987964ef96fa0e64194de704a3549c611de1f
3
+ size 17978176
results/{Mixtral-8x7B-Instruct-0.1.jpg β†’ Mixtral-8x7B-Instruct-v0.1.jpg} RENAMED
File without changes
results/{Mixtral-8x7B-Instruct-0.1.pkl β†’ Mixtral-8x7B-Instruct-v0.1.pkl} RENAMED
File without changes
results/{Mixtral-8x7B-Instruct-0.1.png β†’ Mixtral-8x7B-Instruct-v0.1.png} RENAMED
File without changes
results/Qwen1.5-72B-Chat.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ba395c0b55330f689827527831e57e50ae9d824b6635b2bb569713afcf26d4b
3
+ size 14219193
results/StripedHyena-Nous-7B.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f662367ea0d33a368aaa7a72cfeed41d2f3dc05be6289a6fe485a028c7cb98d5
3
+ size 29219512
results/Yi-34B-Chat.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7f09fb5f46ca144490bcb42ec89dd27f169680493501c211bf2bcfcd908da1c
3
+ size 20485423
results/claude-3-haiku-20240307.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45623535997485afdee5b0312f2b5fdcc26cf531fbb56b6c3af6e126dfbe7b0f
3
+ size 19570166
results/{Claude-3-Haiku.jpg β†’ claude-3-haiku-20240307.jpg} RENAMED
File without changes
results/{Claude-3-Haiku.pkl β†’ claude-3-haiku-20240307.pkl} RENAMED
File without changes
results/{Claude-3-Haiku.png β†’ claude-3-haiku-20240307.png} RENAMED
File without changes
results/claude-3-opus-20240229.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d902999bcee4798b81644b2ff0ea78280dd46bc310909154c1ef089adf82789
3
+ size 20131397
results/{Claude-3-Opus.jpg β†’ claude-3-opus-20240229.jpg} RENAMED
File without changes
results/{Claude-3-Opus.pkl β†’ claude-3-opus-20240229.pkl} RENAMED
File without changes