Upload eval_results/HuggingFaceH4/mistral-7b-ift/v48.10/eval_gsm8k.json with huggingface_hub
Browse files
    	
        eval_results/HuggingFaceH4/mistral-7b-ift/v48.10/eval_gsm8k.json
    ADDED
    
    | @@ -0,0 +1,88 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "results": {
         | 
| 3 | 
            +
                "gsm8k": {
         | 
| 4 | 
            +
                  "exact_match,get-answer": 0.11675511751326763,
         | 
| 5 | 
            +
                  "exact_match_stderr,get-answer": 0.008845468136919105,
         | 
| 6 | 
            +
                  "alias": "gsm8k"
         | 
| 7 | 
            +
                }
         | 
| 8 | 
            +
              },
         | 
| 9 | 
            +
              "configs": {
         | 
| 10 | 
            +
                "gsm8k": {
         | 
| 11 | 
            +
                  "task": "gsm8k",
         | 
| 12 | 
            +
                  "group": [
         | 
| 13 | 
            +
                    "math_word_problems"
         | 
| 14 | 
            +
                  ],
         | 
| 15 | 
            +
                  "dataset_path": "gsm8k",
         | 
| 16 | 
            +
                  "dataset_name": "main",
         | 
| 17 | 
            +
                  "training_split": "train",
         | 
| 18 | 
            +
                  "test_split": "test",
         | 
| 19 | 
            +
                  "fewshot_split": "train",
         | 
| 20 | 
            +
                  "doc_to_text": "Question: {{question}}\nAnswer:",
         | 
| 21 | 
            +
                  "doc_to_target": "{{answer}}",
         | 
| 22 | 
            +
                  "description": "",
         | 
| 23 | 
            +
                  "target_delimiter": " ",
         | 
| 24 | 
            +
                  "fewshot_delimiter": "\n\n",
         | 
| 25 | 
            +
                  "num_fewshot": 5,
         | 
| 26 | 
            +
                  "metric_list": [
         | 
| 27 | 
            +
                    {
         | 
| 28 | 
            +
                      "metric": "exact_match",
         | 
| 29 | 
            +
                      "aggregation": "mean",
         | 
| 30 | 
            +
                      "higher_is_better": true,
         | 
| 31 | 
            +
                      "ignore_case": true,
         | 
| 32 | 
            +
                      "ignore_punctuation": false,
         | 
| 33 | 
            +
                      "regexes_to_ignore": [
         | 
| 34 | 
            +
                        ",",
         | 
| 35 | 
            +
                        "\\$",
         | 
| 36 | 
            +
                        "(?s).*#### "
         | 
| 37 | 
            +
                      ]
         | 
| 38 | 
            +
                    }
         | 
| 39 | 
            +
                  ],
         | 
| 40 | 
            +
                  "output_type": "generate_until",
         | 
| 41 | 
            +
                  "generation_kwargs": {
         | 
| 42 | 
            +
                    "until": [
         | 
| 43 | 
            +
                      "\n\n",
         | 
| 44 | 
            +
                      "Question:"
         | 
| 45 | 
            +
                    ],
         | 
| 46 | 
            +
                    "do_sample": false,
         | 
| 47 | 
            +
                    "temperature": 0.0
         | 
| 48 | 
            +
                  },
         | 
| 49 | 
            +
                  "repeats": 1,
         | 
| 50 | 
            +
                  "filter_list": [
         | 
| 51 | 
            +
                    {
         | 
| 52 | 
            +
                      "name": "get-answer",
         | 
| 53 | 
            +
                      "filter": [
         | 
| 54 | 
            +
                        {
         | 
| 55 | 
            +
                          "function": "regex",
         | 
| 56 | 
            +
                          "regex_pattern": "#### (\\-?[0-9\\.\\,]+)"
         | 
| 57 | 
            +
                        },
         | 
| 58 | 
            +
                        {
         | 
| 59 | 
            +
                          "function": "take_first"
         | 
| 60 | 
            +
                        }
         | 
| 61 | 
            +
                      ]
         | 
| 62 | 
            +
                    }
         | 
| 63 | 
            +
                  ],
         | 
| 64 | 
            +
                  "should_decontaminate": false,
         | 
| 65 | 
            +
                  "metadata": {
         | 
| 66 | 
            +
                    "version": 2.0
         | 
| 67 | 
            +
                  }
         | 
| 68 | 
            +
                }
         | 
| 69 | 
            +
              },
         | 
| 70 | 
            +
              "versions": {
         | 
| 71 | 
            +
                "gsm8k": 2.0
         | 
| 72 | 
            +
              },
         | 
| 73 | 
            +
              "n-shot": {
         | 
| 74 | 
            +
                "gsm8k": 5
         | 
| 75 | 
            +
              },
         | 
| 76 | 
            +
              "config": {
         | 
| 77 | 
            +
                "model": "hf",
         | 
| 78 | 
            +
                "model_args": "pretrained=HuggingFaceH4/mistral-7b-ift,revision=v48.10,dtype=bfloat16",
         | 
| 79 | 
            +
                "batch_size": "auto",
         | 
| 80 | 
            +
                "batch_sizes": [],
         | 
| 81 | 
            +
                "device": null,
         | 
| 82 | 
            +
                "use_cache": null,
         | 
| 83 | 
            +
                "limit": null,
         | 
| 84 | 
            +
                "bootstrap_iters": 100000,
         | 
| 85 | 
            +
                "gen_kwargs": null
         | 
| 86 | 
            +
              },
         | 
| 87 | 
            +
              "git_hash": "8a6546c"
         | 
| 88 | 
            +
            }
         | 
 
			
