Upload 9 files

Files changed (9) hide show

benchmark_scores/result_jaqket_v2-0.2-0.2.json ADDED Viewed

+{
+    "results": {
+      "jaqket_v2-0.2-0.2": {
+        "exact_match": 80.2405498281787,
+        "f1": 84.5903685852139
+      }
+    },
+    "versions": {
+      "jaqket_v2-0.2-0.2": 0.2
+    },
+    "config": {
+      "model": "hf-causal-experimental",
+      "model_args": "pretrained=HachiML/youri-2x7b_dev,tokenizer=HachiML/youri-2x7b_dev,use_accelerate=True,dtype=auto",
+      "num_fewshot": [
+        1
+      ],
+      "batch_size": 2,
+      "device": "cuda",
+      "no_cache": false,
+      "limit": null,
+      "bootstrap_iters": 100000,
+      "description_dict": null
+    }
+}

benchmark_scores/result_jcola.json ADDED Viewed

+{
+  "results": {
+    "jcola": {
+      "balanced_acc": 0.5909437739064947,
+      "mcc": 0.14568482514613612,
+      "mcc_stderr": 0.03657851423629269,
+      "macro_f1": 0.5553402665190399
+    }
+  },
+  "versions": {
+    "jcola": 0.2
+  },
+  "config": {
+    "model": "hf-causal-experimental",
+    "model_args": "pretrained=HachiML/youri-2x7b_dev,tokenizer=HachiML/youri-2x7b_dev,use_accelerate=True,dtype=auto",
+    "num_fewshot": [
+      5
+    ],
+    "batch_size": 2,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": null
+  }
+}

benchmark_scores/result_jcommonsenseqa-1.1-0.2.1.json ADDED Viewed

+{
+    "results": {
+      "jcommonsenseqa-1.1-0.2.1": {
+        "acc": 0.9115281501340483,
+        "acc_stderr": 0.008493108942289001,
+        "acc_norm": 0.837354781054513,
+        "acc_norm_stderr": 0.011037087895481251
+      }
+    },
+    "versions": {
+      "jcommonsenseqa-1.1-0.2.1": 1.1
+    },
+    "config": {
+      "model": "hf-causal-experimental",
+      "model_args": "pretrained=HachiML/youri-2x7b_dev,tokenizer=HachiML/youri-2x7b_dev,use_accelerate=True,dtype=auto",
+      "num_fewshot": [
+        3
+      ],
+      "batch_size": 1,
+      "device": "cuda",
+      "no_cache": false,
+      "limit": null,
+      "bootstrap_iters": 100000,
+      "description_dict": null
+    }
+}

benchmark_scores/result_jnli-1.3-0.2.json ADDED Viewed

+{
+    "results": {
+      "jnli-1.3-0.2": {
+        "acc": 0.5866885784716516,
+        "acc_stderr": 0.00998323894065562,
+        "acc_norm": 0.5866885784716516,
+        "acc_norm_stderr": 0.00998323894065562,
+        "balanced_acc": 0.7103671626849409,
+        "mcc": 0.48037594993558813,
+        "mcc_stderr": 0.01202764597359472,
+        "macro_f1": 0.6010291975400543
+      }
+    },
+    "versions": {
+      "jnli-1.3-0.2": 1.3
+    },
+    "config": {
+      "model": "hf-causal-experimental",
+      "model_args": "pretrained=HachiML/youri-2x7b_dev,tokenizer=HachiML/youri-2x7b_dev,use_accelerate=True,dtype=auto",
+      "num_fewshot": [
+        3
+      ],
+      "batch_size": 8,
+      "device": "cuda",
+      "no_cache": false,
+      "limit": null,
+      "bootstrap_iters": 100000,
+      "description_dict": null
+    }
+}

benchmark_scores/result_jsquad-1.2-0.2.json ADDED Viewed

+{
+    "results": {
+      "jsquad-1.2-0.2": {
+        "exact_match": 80.07654209815398,
+        "f1": 91.30275393110973
+      }
+    },
+    "versions": {
+      "jsquad-1.2-0.2": 1.2
+    },
+    "config": {
+      "model": "hf-causal-experimental",
+      "model_args": "pretrained=HachiML/youri-2x7b_dev,tokenizer=HachiML/youri-2x7b_dev,use_accelerate=True,dtype=auto",
+      "num_fewshot": [
+        2
+      ],
+      "batch_size": 1,
+      "device": "cuda",
+      "no_cache": false,
+      "limit": null,
+      "bootstrap_iters": 100000,
+      "description_dict": null
+    }
+}

benchmark_scores/result_marc_ja-1.1-0.2.json ADDED Viewed

+{
+  "results": {
+    "marc_ja-1.1-0.2": {
+      "acc": 0.9755925008843297,
+      "acc_stderr": 0.0020523733894530468,
+      "acc_norm": 0.9755925008843297,
+      "acc_norm_stderr": 0.0020523733894530468,
+      "balanced_acc": 0.958966027376291,
+      "mcc": 0.9035669838271105,
+      "mcc_stderr": 0.008066342268937397,
+      "macro_f1": 0.9516669276635559
+    }
+  },
+  "versions": {
+    "marc_ja-1.1-0.2": 1.1
+  },
+  "config": {
+    "model": "hf-causal-experimental",
+    "model_args": "pretrained=HachiML/youri-2x7b_dev,tokenizer=HachiML/youri-2x7b_dev,use_accelerate=True,dtype=auto",
+    "num_fewshot": [
+      0
+    ],
+    "batch_size": 2,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": null
+  }
+}

benchmark_scores/result_mgsm.json ADDED Viewed

+{
+  "results": {
+    "mgsm": {
+      "acc": 0.248,
+      "acc_stderr": 0.027367497504863555
+    }
+  },
+  "versions": {
+    "mgsm": 1.0
+  },
+  "config": {
+    "model": "hf-causal-experimental",
+    "model_args": "pretrained=HachiML/youri-2x7b_dev,tokenizer=HachiML/youri-2x7b_dev,use_accelerate=True,dtype=auto",
+    "num_fewshot": [
+      5
+    ],
+    "batch_size": 2,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": null
+  }
+}

benchmark_scores/result_xlsum_ja.json ADDED Viewed

+{
+  "results": {
+    "xlsum_ja": {
+      "rouge2": 25.628917796629892
+    }
+  },
+  "versions": {
+    "xlsum_ja": 1.0
+  },
+  "config": {
+    "model": "hf-causal-experimental",
+    "model_args": "pretrained=HachiML/youri-2x7b_dev,tokenizer=HachiML/youri-2x7b_dev,use_accelerate=True,dtype=auto",
+    "num_fewshot": [
+      1
+    ],
+    "batch_size": 2,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": null
+  }
+}

benchmark_scores/result_xwinograd_ja.json ADDED Viewed

+{
+    "results": {
+      "xwinograd_ja": {
+        "acc": 0.8143899895724713,
+        "acc_stderr": 0.012561287517973916
+      }
+    },
+    "versions": {
+      "xwinograd_ja": 1.0
+    },
+    "config": {
+      "model": "hf-causal-experimental",
+      "model_args": "pretrained=HachiML/youri-2x7b_dev,tokenizer=HachiML/youri-2x7b_dev,use_accelerate=True,dtype=auto",
+      "num_fewshot": [
+        0
+      ],
+      "batch_size": 2,
+      "device": "cuda",
+      "no_cache": false,
+      "limit": null,
+      "bootstrap_iters": 100000,
+      "description_dict": null
+    }
+}