Spaces:
Sleeping
Sleeping
update
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- .gitattributes +40 -0
- app.py +83 -20
- results-cot/Mixtral-8x7B-Instruct-v0.1.csv +3 -0
- results-cot/{gpt-4v-CoT-Azure.csv β Mixtral-8x7B-Instruct-v0.1.jpg} +2 -2
- results-cot/Mixtral-8x7B-Instruct-v0.1.pkl +3 -0
- results-cot/{gpt-4v-CoT-Azure.jpg β Mixtral-8x7B-Instruct-v0.1.png} +2 -2
- results-cot/Qwen1.5-72B-Chat.csv +3 -0
- results-cot/{gpt-4v-CoT-Azure.pkl β Qwen1.5-72B-Chat.jpg} +2 -2
- results-cot/Qwen1.5-72B-Chat.pkl +3 -0
- results-cot/{gpt-4v-CoT-Azure.png β Qwen1.5-72B-Chat.png} +2 -2
- results-cot/gemma-7b-it.csv +3 -0
- results-cot/gemma-7b-it.jpg +3 -0
- results-cot/gemma-7b-it.pkl +3 -0
- results-cot/gemma-7b-it.png +3 -0
- results-cot/{gpt-3.5-CoT.csv β gpt-3.5-turbo-0125.csv} +0 -0
- results-cot/{gpt-3.5-CoT.jpg β gpt-3.5-turbo-0125.jpg} +0 -0
- results-cot/{gpt-3.5-CoT.pkl β gpt-3.5-turbo-0125.pkl} +0 -0
- results-cot/{gpt-3.5-CoT.png β gpt-3.5-turbo-0125.png} +0 -0
- results-vision-CoT/gemini-pro-vision-CoT.csv +0 -3
- results-vision-CoT/gemini-pro-vision-CoT.jpg +0 -3
- results-vision-CoT/gemini-pro-vision-CoT.pkl +0 -3
- results-vision-CoT/gemini-pro-vision-CoT.png +0 -3
- results-vision/gemini-pro-vision-CoT.csv +0 -3
- results-vision/gemini-pro-vision-CoT.jpg +0 -3
- results-vision/gemini-pro-vision-CoT.pkl +0 -3
- results-vision/gemini-pro-vision-CoT.png +0 -3
- results-vision/gpt-4v-CoT.csv +0 -3
- results-vision/gpt-4v-CoT.jpg +0 -3
- results-vision/gpt-4v-CoT.pkl +0 -3
- results-vision/gpt-4v-CoT.png +0 -3
- results/CodeLlama-70b-Instruct-hf.csv +3 -0
- results/{CodeLlama-70B.jpg β CodeLlama-70b-Instruct-hf.jpg} +0 -0
- results/{CodeLlama-70B.pkl β CodeLlama-70b-Instruct-hf.pkl} +0 -0
- results/{CodeLlama-70B.png β CodeLlama-70b-Instruct-hf.png} +0 -0
- results/Llama-2-70b-chat-hf.csv +3 -0
- results/Mistral-7B-Instruct-v0.2.csv +3 -0
- results/Mixtral-8x7B-Instruct-v0.1.csv +3 -0
- results/{Mixtral-8x7B-Instruct-0.1.jpg β Mixtral-8x7B-Instruct-v0.1.jpg} +0 -0
- results/{Mixtral-8x7B-Instruct-0.1.pkl β Mixtral-8x7B-Instruct-v0.1.pkl} +0 -0
- results/{Mixtral-8x7B-Instruct-0.1.png β Mixtral-8x7B-Instruct-v0.1.png} +0 -0
- results/Qwen1.5-72B-Chat.csv +3 -0
- results/StripedHyena-Nous-7B.csv +3 -0
- results/Yi-34B-Chat.csv +3 -0
- results/claude-3-haiku-20240307.csv +3 -0
- results/{Claude-3-Haiku.jpg β claude-3-haiku-20240307.jpg} +0 -0
- results/{Claude-3-Haiku.pkl β claude-3-haiku-20240307.pkl} +0 -0
- results/{Claude-3-Haiku.png β claude-3-haiku-20240307.png} +0 -0
- results/claude-3-opus-20240229.csv +3 -0
- results/{Claude-3-Opus.jpg β claude-3-opus-20240229.jpg} +0 -0
- results/{Claude-3-Opus.pkl β claude-3-opus-20240229.pkl} +0 -0
.gitattributes
CHANGED
@@ -115,3 +115,43 @@ results-cot/gpt-4v-CoT-Azure.pkl filter=lfs diff=lfs merge=lfs -text
|
|
115 |
results-cot/gpt-4v-CoT-Azure.csv filter=lfs diff=lfs merge=lfs -text
|
116 |
results-vision-CoT/gemini-pro-vision-CoT.csv filter=lfs diff=lfs merge=lfs -text
|
117 |
results-cot/gpt-3.5-CoT.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
results-cot/gpt-4v-CoT-Azure.csv filter=lfs diff=lfs merge=lfs -text
|
116 |
results-vision-CoT/gemini-pro-vision-CoT.csv filter=lfs diff=lfs merge=lfs -text
|
117 |
results-cot/gpt-3.5-CoT.csv filter=lfs diff=lfs merge=lfs -text
|
118 |
+
results/claude-3-haiku-20240307.csv filter=lfs diff=lfs merge=lfs -text
|
119 |
+
results/claude-3-opus-20240229.csv filter=lfs diff=lfs merge=lfs -text
|
120 |
+
results-cot/gemma-7b-it.csv filter=lfs diff=lfs merge=lfs -text
|
121 |
+
results-cot/gpt-3.5-turbo-0125.csv filter=lfs diff=lfs merge=lfs -text
|
122 |
+
results/gpt-3.5-turbo-0125.csv filter=lfs diff=lfs merge=lfs -text
|
123 |
+
results-cot/Mixtral-8x7B-Instruct-v0.1.csv filter=lfs diff=lfs merge=lfs -text
|
124 |
+
results/gemma-7b-it.csv filter=lfs diff=lfs merge=lfs -text
|
125 |
+
results-cot/Qwen1.5-72B-Chat.csv filter=lfs diff=lfs merge=lfs -text
|
126 |
+
results/CodeLlama-70b-Instruct-hf.csv filter=lfs diff=lfs merge=lfs -text
|
127 |
+
results/Mixtral-8x7B-Instruct-v0.1.csv filter=lfs diff=lfs merge=lfs -text
|
128 |
+
results-cot/gemma-7b-it.pkl filter=lfs diff=lfs merge=lfs -text
|
129 |
+
results/claude-3-haiku-20240307.pkl filter=lfs diff=lfs merge=lfs -text
|
130 |
+
results/gemma-7b-it.pkl filter=lfs diff=lfs merge=lfs -text
|
131 |
+
results-cot/gpt-3.5-turbo-0125.pkl filter=lfs diff=lfs merge=lfs -text
|
132 |
+
results-cot/Mixtral-8x7B-Instruct-v0.1.pkl filter=lfs diff=lfs merge=lfs -text
|
133 |
+
results/Mixtral-8x7B-Instruct-v0.1.pkl filter=lfs diff=lfs merge=lfs -text
|
134 |
+
results/claude-3-opus-20240229.pkl filter=lfs diff=lfs merge=lfs -text
|
135 |
+
results-cot/Qwen1.5-72B-Chat.pkl filter=lfs diff=lfs merge=lfs -text
|
136 |
+
results/CodeLlama-70b-Instruct-hf.pkl filter=lfs diff=lfs merge=lfs -text
|
137 |
+
results/gpt-3.5-turbo-0125.pkl filter=lfs diff=lfs merge=lfs -text
|
138 |
+
results/claude-3-haiku-20240307.jpg filter=lfs diff=lfs merge=lfs -text
|
139 |
+
results/claude-3-opus-20240229.jpg filter=lfs diff=lfs merge=lfs -text
|
140 |
+
results/gpt-3.5-turbo-0125.jpg filter=lfs diff=lfs merge=lfs -text
|
141 |
+
results-cot/gpt-3.5-turbo-0125.jpg filter=lfs diff=lfs merge=lfs -text
|
142 |
+
results/Mixtral-8x7B-Instruct-v0.1.jpg filter=lfs diff=lfs merge=lfs -text
|
143 |
+
results-cot/Qwen1.5-72B-Chat.jpg filter=lfs diff=lfs merge=lfs -text
|
144 |
+
results/gemma-7b-it.jpg filter=lfs diff=lfs merge=lfs -text
|
145 |
+
results-cot/Mixtral-8x7B-Instruct-v0.1.jpg filter=lfs diff=lfs merge=lfs -text
|
146 |
+
results-cot/gemma-7b-it.jpg filter=lfs diff=lfs merge=lfs -text
|
147 |
+
results/CodeLlama-70b-Instruct-hf.jpg filter=lfs diff=lfs merge=lfs -text
|
148 |
+
results-cot/gemma-7b-it.png filter=lfs diff=lfs merge=lfs -text
|
149 |
+
results-cot/gpt-3.5-turbo-0125.png filter=lfs diff=lfs merge=lfs -text
|
150 |
+
results/gpt-3.5-turbo-0125.png filter=lfs diff=lfs merge=lfs -text
|
151 |
+
results/CodeLlama-70b-Instruct-hf.png filter=lfs diff=lfs merge=lfs -text
|
152 |
+
results/Mixtral-8x7B-Instruct-v0.1.png filter=lfs diff=lfs merge=lfs -text
|
153 |
+
results/claude-3-opus-20240229.png filter=lfs diff=lfs merge=lfs -text
|
154 |
+
results-cot/Mixtral-8x7B-Instruct-v0.1.png filter=lfs diff=lfs merge=lfs -text
|
155 |
+
results-cot/Qwen1.5-72B-Chat.png filter=lfs diff=lfs merge=lfs -text
|
156 |
+
results/claude-3-haiku-20240307.png filter=lfs diff=lfs merge=lfs -text
|
157 |
+
results/gemma-7b-it.png filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -10,7 +10,7 @@ vision_results = glob("results-vision/*.pkl")
|
|
10 |
# Load CoT text benchmark results
|
11 |
cot_text_results = glob("results-cot/*.pkl")
|
12 |
# Load CoT vision benchmark results
|
13 |
-
cot_vision_results = glob("results-vision-CoT/*.pkl")
|
14 |
|
15 |
# Function to load data, add model type and name
|
16 |
def load_data(files, model_type):
|
@@ -27,18 +27,22 @@ def load_data(files, model_type):
|
|
27 |
data = load_data(csv_results, "Text Only")
|
28 |
vision_data = load_data(vision_results, "Vision")
|
29 |
cot_text_data = load_data(cot_text_results, "CoT Text Only")
|
30 |
-
cot_vision_data = load_data(cot_vision_results, "CoT Vision")
|
31 |
|
32 |
# Combine all data into a single DataFrame
|
33 |
all_data = pd.concat(
|
34 |
-
[data, vision_data, cot_text_data
|
35 |
)
|
36 |
|
37 |
all_model_names = all_data["Model Name"].unique()
|
38 |
all_text_only_model_names = list(
|
39 |
all_data[all_data["Model Type"] == "Text Only"]["Model Name"].unique()
|
40 |
)
|
41 |
-
|
|
|
|
|
|
|
|
|
42 |
|
43 |
## Continue with the cold code --
|
44 |
# TODO: Update me to read from all_data for later
|
@@ -50,7 +54,7 @@ vision_data = {file: pd.read_pickle(file) for file in vision_results}
|
|
50 |
# Load the CoT text files into a dict
|
51 |
cot_text_data = {file: pd.read_pickle(file) for file in cot_text_results}
|
52 |
# Load the CoT vision files into a dict
|
53 |
-
cot_vision_data = {file: pd.read_pickle(file) for file in cot_vision_results}
|
54 |
|
55 |
|
56 |
def calculate_accuracy(df):
|
@@ -96,13 +100,13 @@ def process_data(data):
|
|
96 |
text_data_for_df = process_data(data)
|
97 |
vision_data_for_df = process_data(vision_data)
|
98 |
cot_text_data_for_df = process_data(cot_text_data)
|
99 |
-
cot_vision_data_for_df = process_data(cot_vision_data)
|
100 |
|
101 |
# Create DataFrames
|
102 |
accuracy_df = pd.DataFrame(text_data_for_df, columns=column_names)
|
103 |
vision_accuracy_df = pd.DataFrame(vision_data_for_df, columns=column_names)
|
104 |
cot_text_accuracy_df = pd.DataFrame(cot_text_data_for_df, columns=column_names)
|
105 |
-
cot_vision_accuracy_df = pd.DataFrame(cot_vision_data_for_df, columns=column_names)
|
106 |
|
107 |
# Function to finalize DataFrame
|
108 |
def finalize_df(df):
|
@@ -117,7 +121,7 @@ def finalize_df(df):
|
|
117 |
accuracy_df = finalize_df(accuracy_df)
|
118 |
vision_accuracy_df = finalize_df(vision_accuracy_df)
|
119 |
cot_text_accuracy_df = finalize_df(cot_text_accuracy_df)
|
120 |
-
cot_vision_accuracy_df = finalize_df(cot_vision_accuracy_df)
|
121 |
|
122 |
|
123 |
def load_heatmap(evt: gr.SelectData):
|
@@ -176,6 +180,43 @@ def calculate_order_by_first_substring(selected_models):
|
|
176 |
return text_only_filtered, number_of_queries, number_of_fsms
|
177 |
|
178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
with gr.Blocks() as demo:
|
180 |
gr.Markdown("# FSM Benchmark Leaderboard")
|
181 |
with gr.Tab("Text-only Benchmark"):
|
@@ -196,8 +237,8 @@ with gr.Blocks() as demo:
|
|
196 |
fn=load_vision_heatmap, outputs=[heatmap_image_vision]
|
197 |
)
|
198 |
|
199 |
-
with gr.Tab("
|
200 |
-
gr.Markdown("#
|
201 |
cot_leader_board_text = gr.Dataframe(
|
202 |
cot_text_accuracy_df, headers=headers_with_icons
|
203 |
)
|
@@ -207,16 +248,16 @@ with gr.Blocks() as demo:
|
|
207 |
fn=load_cot_heatmap, outputs=[cot_heatmap_image_text]
|
208 |
)
|
209 |
|
210 |
-
with gr.Tab("
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
|
221 |
with gr.Tab("Constraint Text-only Results"):
|
222 |
gr.Markdown("## Constraint Text-only Leaderboard by first substring")
|
@@ -240,4 +281,26 @@ with gr.Blocks() as demo:
|
|
240 |
queue=True,
|
241 |
)
|
242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
demo.launch()
|
|
|
10 |
# Load CoT text benchmark results
|
11 |
cot_text_results = glob("results-cot/*.pkl")
|
12 |
# Load CoT vision benchmark results
|
13 |
+
# cot_vision_results = glob("results-vision-CoT/*.pkl")
|
14 |
|
15 |
# Function to load data, add model type and name
|
16 |
def load_data(files, model_type):
|
|
|
27 |
data = load_data(csv_results, "Text Only")
|
28 |
vision_data = load_data(vision_results, "Vision")
|
29 |
cot_text_data = load_data(cot_text_results, "CoT Text Only")
|
30 |
+
# cot_vision_data = load_data(cot_vision_results, "CoT Vision")
|
31 |
|
32 |
# Combine all data into a single DataFrame
|
33 |
all_data = pd.concat(
|
34 |
+
[data, vision_data, cot_text_data], ignore_index=True
|
35 |
)
|
36 |
|
37 |
all_model_names = all_data["Model Name"].unique()
|
38 |
all_text_only_model_names = list(
|
39 |
all_data[all_data["Model Type"] == "Text Only"]["Model Name"].unique()
|
40 |
)
|
41 |
+
all_cot_text_only_models = list(
|
42 |
+
all_data[all_data["Model Type"] == "CoT Text Only"]["Model Name"].unique()
|
43 |
+
)
|
44 |
+
|
45 |
+
|
46 |
|
47 |
## Continue with the cold code --
|
48 |
# TODO: Update me to read from all_data for later
|
|
|
54 |
# Load the CoT text files into a dict
|
55 |
cot_text_data = {file: pd.read_pickle(file) for file in cot_text_results}
|
56 |
# Load the CoT vision files into a dict
|
57 |
+
# cot_vision_data = {file: pd.read_pickle(file) for file in cot_vision_results}
|
58 |
|
59 |
|
60 |
def calculate_accuracy(df):
|
|
|
100 |
text_data_for_df = process_data(data)
|
101 |
vision_data_for_df = process_data(vision_data)
|
102 |
cot_text_data_for_df = process_data(cot_text_data)
|
103 |
+
# cot_vision_data_for_df = process_data(cot_vision_data)
|
104 |
|
105 |
# Create DataFrames
|
106 |
accuracy_df = pd.DataFrame(text_data_for_df, columns=column_names)
|
107 |
vision_accuracy_df = pd.DataFrame(vision_data_for_df, columns=column_names)
|
108 |
cot_text_accuracy_df = pd.DataFrame(cot_text_data_for_df, columns=column_names)
|
109 |
+
# cot_vision_accuracy_df = pd.DataFrame(cot_vision_data_for_df, columns=column_names)
|
110 |
|
111 |
# Function to finalize DataFrame
|
112 |
def finalize_df(df):
|
|
|
121 |
accuracy_df = finalize_df(accuracy_df)
|
122 |
vision_accuracy_df = finalize_df(vision_accuracy_df)
|
123 |
cot_text_accuracy_df = finalize_df(cot_text_accuracy_df)
|
124 |
+
# cot_vision_accuracy_df = finalize_df(cot_vision_accuracy_df)
|
125 |
|
126 |
|
127 |
def load_heatmap(evt: gr.SelectData):
|
|
|
180 |
return text_only_filtered, number_of_queries, number_of_fsms
|
181 |
|
182 |
|
183 |
+
|
184 |
+
def calculate_order_by_first_substring_cot(selected_models):
|
185 |
+
|
186 |
+
first_columns = all_data[all_data["substring_index"] == 1]
|
187 |
+
query_ids_df = first_columns[first_columns["Model Type"] == "CoT Text Only"]
|
188 |
+
query_ids_df = query_ids_df[query_ids_df["Model Name"].isin(selected_models)]
|
189 |
+
|
190 |
+
query_ids_df = query_ids_df.groupby("query_id").filter(
|
191 |
+
lambda x: x["parsed_judge_response"].eq(1).all()
|
192 |
+
)
|
193 |
+
|
194 |
+
fsm_ids = query_ids_df.fsm_id.unique()
|
195 |
+
|
196 |
+
text_only = all_data[all_data["Model Type"] == "CoT Text Only"]
|
197 |
+
text_only_filtered = text_only[text_only["fsm_id"].isin(fsm_ids)]
|
198 |
+
|
199 |
+
query_ids = text_only_filtered.query_id.unique()
|
200 |
+
text_only_filtered = (
|
201 |
+
text_only_filtered.groupby(["Model Name"])["parsed_judge_response"]
|
202 |
+
.mean()
|
203 |
+
.reset_index()
|
204 |
+
)
|
205 |
+
|
206 |
+
text_only_filtered["Accuracy"] = text_only_filtered["parsed_judge_response"] * 100
|
207 |
+
text_only_filtered.drop("parsed_judge_response", axis=1, inplace=True)
|
208 |
+
|
209 |
+
text_only_filtered["Accuracy"] = text_only_filtered["Accuracy"].apply(
|
210 |
+
lambda x: round(x, 2)
|
211 |
+
)
|
212 |
+
text_only_filtered.sort_values("Accuracy", ascending=False, inplace=True)
|
213 |
+
|
214 |
+
number_of_queries = len(query_ids)
|
215 |
+
number_of_fsms = len(fsm_ids)
|
216 |
+
|
217 |
+
return text_only_filtered, number_of_queries, number_of_fsms
|
218 |
+
|
219 |
+
|
220 |
with gr.Blocks() as demo:
|
221 |
gr.Markdown("# FSM Benchmark Leaderboard")
|
222 |
with gr.Tab("Text-only Benchmark"):
|
|
|
237 |
fn=load_vision_heatmap, outputs=[heatmap_image_vision]
|
238 |
)
|
239 |
|
240 |
+
with gr.Tab("Text-only Benchmark (CoT)"):
|
241 |
+
gr.Markdown("# Text-only Leaderboard (CoT)")
|
242 |
cot_leader_board_text = gr.Dataframe(
|
243 |
cot_text_accuracy_df, headers=headers_with_icons
|
244 |
)
|
|
|
248 |
fn=load_cot_heatmap, outputs=[cot_heatmap_image_text]
|
249 |
)
|
250 |
|
251 |
+
# with gr.Tab("Vision Benchmark (CoT)"):
|
252 |
+
# gr.Markdown("# Vision Benchmark Leaderboard (CoT)")
|
253 |
+
# cot_leader_board_vision = gr.Dataframe(
|
254 |
+
# cot_vision_accuracy_df, headers=headers_with_icons
|
255 |
+
# )
|
256 |
+
# gr.Markdown("## Heatmap")
|
257 |
+
# cot_heatmap_image_vision = gr.Image(label="", show_label=False)
|
258 |
+
# cot_leader_board_vision.select(
|
259 |
+
# fn=load_cot_vision_heatmap, outputs=[cot_heatmap_image_vision]
|
260 |
+
# )
|
261 |
|
262 |
with gr.Tab("Constraint Text-only Results"):
|
263 |
gr.Markdown("## Constraint Text-only Leaderboard by first substring")
|
|
|
281 |
queue=True,
|
282 |
)
|
283 |
|
284 |
+
|
285 |
+
with gr.Tab("Constraint Text-only Results (CoT)"):
|
286 |
+
gr.Markdown("## Constraint Text-only Leaderboard by first substrin (CoT)")
|
287 |
+
included_models_cot = gr.CheckboxGroup(
|
288 |
+
label="Models to include",
|
289 |
+
choices=all_cot_text_only_models,
|
290 |
+
value=all_cot_text_only_models,
|
291 |
+
interactive=True,
|
292 |
+
)
|
293 |
+
with gr.Row():
|
294 |
+
number_of_queries_cot = gr.Textbox(label="Number of included queries")
|
295 |
+
number_of_fsms_cot = gr.Textbox(label="Number of included FSMs")
|
296 |
+
|
297 |
+
constrained_leader_board_text_cot = gr.Dataframe()
|
298 |
+
|
299 |
+
included_models_cot.select(
|
300 |
+
fn=calculate_order_by_first_substring_cot,
|
301 |
+
inputs=[included_models_cot],
|
302 |
+
outputs=[constrained_leader_board_text_cot, number_of_queries_cot, number_of_fsms_cot],
|
303 |
+
queue=True,
|
304 |
+
)
|
305 |
+
|
306 |
demo.launch()
|
results-cot/Mixtral-8x7B-Instruct-v0.1.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:093e919d90609c3be8d6818cf56ca018214da3a42b78aeaf85f92581b72c5ad4
|
3 |
+
size 19494123
|
results-cot/{gpt-4v-CoT-Azure.csv β Mixtral-8x7B-Instruct-v0.1.jpg}
RENAMED
File without changes
|
results-cot/Mixtral-8x7B-Instruct-v0.1.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:686692584c6ba027c454d699bbf585b95e5c99bfc426810ea74b327a975b9cf3
|
3 |
+
size 19489822
|
results-cot/{gpt-4v-CoT-Azure.jpg β Mixtral-8x7B-Instruct-v0.1.png}
RENAMED
File without changes
|
results-cot/Qwen1.5-72B-Chat.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32681449776facf1084405001e69ed7926b79c69f9717fb159e3eb064b333636
|
3 |
+
size 15795431
|
results-cot/{gpt-4v-CoT-Azure.pkl β Qwen1.5-72B-Chat.jpg}
RENAMED
File without changes
|
results-cot/Qwen1.5-72B-Chat.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c20383298d4b6482ca7c30bf91822e24099dc67b71a3be10271005e25208c40
|
3 |
+
size 15778970
|
results-cot/{gpt-4v-CoT-Azure.png β Qwen1.5-72B-Chat.png}
RENAMED
File without changes
|
results-cot/gemma-7b-it.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8535fa3f2ef5a94b1b552859930e0476ca0f3c77ec4c277893a9ab9ef45d6c3
|
3 |
+
size 16793758
|
results-cot/gemma-7b-it.jpg
ADDED
Git LFS Details
|
results-cot/gemma-7b-it.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c581027f8b78df5934117276cec3e53613f5ac953d045f71af4121b3ec2e1a4
|
3 |
+
size 16822239
|
results-cot/gemma-7b-it.png
ADDED
Git LFS Details
|
results-cot/{gpt-3.5-CoT.csv β gpt-3.5-turbo-0125.csv}
RENAMED
File without changes
|
results-cot/{gpt-3.5-CoT.jpg β gpt-3.5-turbo-0125.jpg}
RENAMED
File without changes
|
results-cot/{gpt-3.5-CoT.pkl β gpt-3.5-turbo-0125.pkl}
RENAMED
File without changes
|
results-cot/{gpt-3.5-CoT.png β gpt-3.5-turbo-0125.png}
RENAMED
File without changes
|
results-vision-CoT/gemini-pro-vision-CoT.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:1ebebe1d6caee19a4f714bf13eaba72e7a0b5d15281c407cd4dc53a2820ad312
|
3 |
-
size 6184119
|
|
|
|
|
|
|
|
results-vision-CoT/gemini-pro-vision-CoT.jpg
DELETED
Git LFS Details
|
results-vision-CoT/gemini-pro-vision-CoT.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:049d575dbad9da04496fea752e19f915bcec445b13f3010f9c67544012c936ff
|
3 |
-
size 6144275
|
|
|
|
|
|
|
|
results-vision-CoT/gemini-pro-vision-CoT.png
DELETED
Git LFS Details
|
results-vision/gemini-pro-vision-CoT.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:1ebebe1d6caee19a4f714bf13eaba72e7a0b5d15281c407cd4dc53a2820ad312
|
3 |
-
size 6184119
|
|
|
|
|
|
|
|
results-vision/gemini-pro-vision-CoT.jpg
DELETED
Git LFS Details
|
results-vision/gemini-pro-vision-CoT.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:049d575dbad9da04496fea752e19f915bcec445b13f3010f9c67544012c936ff
|
3 |
-
size 6144275
|
|
|
|
|
|
|
|
results-vision/gemini-pro-vision-CoT.png
DELETED
Git LFS Details
|
results-vision/gpt-4v-CoT.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:04b4de1a7a4280354c89609d15282109ee60f8f58129960dc0edbb046b12a5c6
|
3 |
-
size 6374181
|
|
|
|
|
|
|
|
results-vision/gpt-4v-CoT.jpg
DELETED
Git LFS Details
|
results-vision/gpt-4v-CoT.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:52ae5e417e011db84976acd51a024eae7ccea1e686b7f3f0e8158cd77be4f847
|
3 |
-
size 6320889
|
|
|
|
|
|
|
|
results-vision/gpt-4v-CoT.png
DELETED
Git LFS Details
|
results/CodeLlama-70b-Instruct-hf.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3726905a1656174f3c29edfced6f2eec63222f6be8965c0d970264901d8cfc75
|
3 |
+
size 16476347
|
results/{CodeLlama-70B.jpg β CodeLlama-70b-Instruct-hf.jpg}
RENAMED
File without changes
|
results/{CodeLlama-70B.pkl β CodeLlama-70b-Instruct-hf.pkl}
RENAMED
File without changes
|
results/{CodeLlama-70B.png β CodeLlama-70b-Instruct-hf.png}
RENAMED
File without changes
|
results/Llama-2-70b-chat-hf.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42a31de917b05ed5405474a348d072426474a8fb2ce7ff462dbb121e25f4b6ad
|
3 |
+
size 20760268
|
results/Mistral-7B-Instruct-v0.2.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29ad4985661fc41e659a631fc74ba433cd08a571048f11436ccf87ff74f0db09
|
3 |
+
size 27242025
|
results/Mixtral-8x7B-Instruct-v0.1.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a93e2b963a5ac8129b5284f3fd7987964ef96fa0e64194de704a3549c611de1f
|
3 |
+
size 17978176
|
results/{Mixtral-8x7B-Instruct-0.1.jpg β Mixtral-8x7B-Instruct-v0.1.jpg}
RENAMED
File without changes
|
results/{Mixtral-8x7B-Instruct-0.1.pkl β Mixtral-8x7B-Instruct-v0.1.pkl}
RENAMED
File without changes
|
results/{Mixtral-8x7B-Instruct-0.1.png β Mixtral-8x7B-Instruct-v0.1.png}
RENAMED
File without changes
|
results/Qwen1.5-72B-Chat.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ba395c0b55330f689827527831e57e50ae9d824b6635b2bb569713afcf26d4b
|
3 |
+
size 14219193
|
results/StripedHyena-Nous-7B.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f662367ea0d33a368aaa7a72cfeed41d2f3dc05be6289a6fe485a028c7cb98d5
|
3 |
+
size 29219512
|
results/Yi-34B-Chat.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7f09fb5f46ca144490bcb42ec89dd27f169680493501c211bf2bcfcd908da1c
|
3 |
+
size 20485423
|
results/claude-3-haiku-20240307.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45623535997485afdee5b0312f2b5fdcc26cf531fbb56b6c3af6e126dfbe7b0f
|
3 |
+
size 19570166
|
results/{Claude-3-Haiku.jpg β claude-3-haiku-20240307.jpg}
RENAMED
File without changes
|
results/{Claude-3-Haiku.pkl β claude-3-haiku-20240307.pkl}
RENAMED
File without changes
|
results/{Claude-3-Haiku.png β claude-3-haiku-20240307.png}
RENAMED
File without changes
|
results/claude-3-opus-20240229.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d902999bcee4798b81644b2ff0ea78280dd46bc310909154c1ef089adf82789
|
3 |
+
size 20131397
|
results/{Claude-3-Opus.jpg β claude-3-opus-20240229.jpg}
RENAMED
File without changes
|
results/{Claude-3-Opus.pkl β claude-3-opus-20240229.pkl}
RENAMED
File without changes
|