bigscience
/

bloomz

@@ -81,6 +81,573 @@ widget:
   example_title: "es-en fable"
 - text: "Write a fable about wood elves living in a forest that is suddenly invaded by ogres. The fable is a masterpiece that has achieved praise worldwide and its moral is \"Violence is the last refuge of the incompetent\". Fable (in Hindi):"
   example_title: "hi-en fable"
 ---
 ![xmtf](https://github.com/bigscience-workshop/xmtf/blob/master/xmtf_banner.png?raw=true)

   example_title: "es-en fable"
 - text: "Write a fable about wood elves living in a forest that is suddenly invaded by ogres. The fable is a masterpiece that has achieved praise worldwide and its moral is \"Violence is the last refuge of the incompetent\". Fable (in Hindi):"
   example_title: "hi-en fable"
+model-index:
+- name: bloomz
+  results:
+  - task:
+      type: Coreference resolution
+    dataset:
+      type: winogrande
+      name: Winogrande XL
+      config: xl
+      split: validation
+      revision: a80f460359d1e9a67c006011c94de42a8759430c
+    metrics:
+    - type: Accuracy
+      value: 59.27
+  - task:
+      type: Coreference resolution
+    dataset:
+      type: Muennighoff/xwinograd
+      name: XWinograd
+      config: en
+      split: test
+      revision: 9dd5ea5505fad86b7bedad667955577815300cee
+    metrics:
+    - type: Accuracy
+      value: 69.08
+  - task:
+      type: Coreference resolution
+    dataset:
+      type: Muennighoff/xwinograd
+      name: XWinograd
+      config: fr
+      split: test
+      revision: 9dd5ea5505fad86b7bedad667955577815300cee
+    metrics:
+    - type: Accuracy
+      value: 68.67
+  - task:
+      type: Coreference resolution
+    dataset:
+      type: Muennighoff/xwinograd
+      name: XWinograd
+      config: jp
+      split: test
+      revision: 9dd5ea5505fad86b7bedad667955577815300cee
+    metrics:
+    - type: Accuracy
+      value: 59.65
+  - task:
+      type: Coreference resolution
+    dataset:
+      type: Muennighoff/xwinograd
+      name: XWinograd
+      config: pt
+      split: test
+      revision: 9dd5ea5505fad86b7bedad667955577815300cee
+    metrics:
+    - type: Accuracy
+      value: 64.26
+  - task:
+      type: Coreference resolution
+    dataset:
+      type: Muennighoff/xwinograd
+      name: XWinograd
+      config: ru
+      split: test
+      revision: 9dd5ea5505fad86b7bedad667955577815300cee
+    metrics:
+    - type: Accuracy
+      value: 60.95
+  - task:
+      type: Coreference resolution
+    dataset:
+      type: Muennighoff/xwinograd
+      name: XWinograd
+      config: zh
+      split: test
+      revision: 9dd5ea5505fad86b7bedad667955577815300cee
+    metrics:
+    - type: Accuracy
+      value: 70.24
+  - task:
+      type: Natural language inference
+    dataset:
+      type: anli
+      name: ANLI
+      config: r1
+      split: validation
+      revision: 9dbd830a06fea8b1c49d6e5ef2004a08d9f45094
+    metrics:
+    - type: Accuracy
+      value: 48.6
+  - task:
+      type: Natural language inference
+    dataset:
+      type: anli
+      name: ANLI
+      config: r2
+      split: validation
+      revision: 9dbd830a06fea8b1c49d6e5ef2004a08d9f45094
+    metrics:
+    - type: Accuracy
+      value: 44.1
+  - task:
+      type: Natural language inference
+    dataset:
+      type: anli
+      name: ANLI
+      config: r3
+      split: validation
+      revision: 9dbd830a06fea8b1c49d6e5ef2004a08d9f45094
+    metrics:
+    - type: Accuracy
+      value: 45.5
+  - task:
+      type: Natural language inference
+    dataset:
+      type: super_glue
+      name: SuperGLUE
+      config: cb
+      split: validation
+      revision: 9e12063561e7e6c79099feb6d5a493142584e9e2
+    metrics:
+    - type: Accuracy
+      value: 82.14
+  - task:
+      type: Natural language inference
+    dataset:
+      type: super_glue
+      name: SuperGLUE
+      config: rte
+      split: validation
+      revision: 9e12063561e7e6c79099feb6d5a493142584e9e2
+    metrics:
+    - type: Accuracy
+      value: 85.56
+  - task:
+      type: Natural language inference
+    dataset:
+      type: xnli
+      name: XNLI
+      config: ar
+      split: validation
+      revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
+    metrics:
+    - type: Accuracy
+      value: 60.68
+  - task:
+      type: Natural language inference
+    dataset:
+      type: xnli
+      name: XNLI
+      config: bg
+      split: validation
+      revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
+    metrics:
+    - type: Accuracy
+      value: 48.43
+  - task:
+      type: Natural language inference
+    dataset:
+      type: xnli
+      name: XNLI
+      config: de
+      split: validation
+      revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
+    metrics:
+    - type: Accuracy
+      value: 54.38
+  - task:
+      type: Natural language inference
+    dataset:
+      type: xnli
+      name: XNLI
+      config: el
+      split: validation
+      revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
+    metrics:
+    - type: Accuracy
+      value: 47.43
+  - task:
+      type: Natural language inference
+    dataset:
+      type: xnli
+      name: XNLI
+      config: en
+      split: validation
+      revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
+    metrics:
+    - type: Accuracy
+      value: 67.47
+  - task:
+      type: Natural language inference
+    dataset:
+      type: xnli
+      name: XNLI
+      config: es
+      split: validation
+      revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
+    metrics:
+    - type: Accuracy
+      value: 61.24
+  - task:
+      type: Natural language inference
+    dataset:
+      type: xnli
+      name: XNLI
+      config: fr
+      split: validation
+      revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
+    metrics:
+    - type: Accuracy
+      value: 61.37
+  - task:
+      type: Natural language inference
+    dataset:
+      type: xnli
+      name: XNLI
+      config: hi
+      split: validation
+      revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
+    metrics:
+    - type: Accuracy
+      value: 60.2
+  - task:
+      type: Natural language inference
+    dataset:
+      type: xnli
+      name: XNLI
+      config: ru
+      split: validation
+      revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
+    metrics:
+    - type: Accuracy
+      value: 54.02
+  - task:
+      type: Natural language inference
+    dataset:
+      type: xnli
+      name: XNLI
+      config: sw
+      split: validation
+      revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
+    metrics:
+    - type: Accuracy
+      value: 52.09
+  - task:
+      type: Natural language inference
+    dataset:
+      type: xnli
+      name: XNLI
+      config: th
+      split: validation
+      revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
+    metrics:
+    - type: Accuracy
+      value: 43.78
+  - task:
+      type: Natural language inference
+    dataset:
+      type: xnli
+      name: XNLI
+      config: tr
+      split: validation
+      revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
+    metrics:
+    - type: Accuracy
+      value: 45.7
+  - task:
+      type: Natural language inference
+    dataset:
+      type: xnli
+      name: XNLI
+      config: ur
+      split: validation
+      revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
+    metrics:
+    - type: Accuracy
+      value: 50.8
+  - task:
+      type: Natural language inference
+    dataset:
+      type: xnli
+      name: XNLI
+      config: vi
+      split: validation
+      revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
+    metrics:
+    - type: Accuracy
+      value: 61.0
+  - task:
+      type: Natural language inference
+    dataset:
+      type: xnli
+      name: XNLI
+      config: zh
+      split: validation
+      revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
+    metrics:
+    - type: Accuracy
+      value: 56.91
+  - task:
+      type: Program synthesis
+    dataset:
+      type: openai_humaneval
+      name: HumanEval
+      split: test
+      revision: e8dc562f5de170c54b5481011dd9f4fa04845771
+    metrics:
+    - type: Pass@1
+      value: 12.06
+    - type: Pass@10
+      value: 26.53
+    - type: Pass@100
+      value: 48.44
+  - task:
+      type: Sentence completion
+    dataset:
+      type: story_cloze
+      name: StoryCloze
+      config: "2016"
+      split: validation
+      revision: e724c6f8cdf7c7a2fb229d862226e15b023ee4db
+    metrics:
+    - type: Accuracy
+      value: 96.26
+  - task:
+      type: Sentence completion
+    dataset:
+      type: super_glue
+      name: SuperGLUE
+      config: copa
+      split: validation
+      revision: 9e12063561e7e6c79099feb6d5a493142584e9e2
+    metrics:
+    - type: Accuracy
+      value: 91.0
+  - task:
+      type: Sentence completion
+    dataset:
+      type: xcopa
+      name: XCOPA
+      config: et
+      split: validation
+      revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
+    metrics:
+    - type: Accuracy
+      value: 51.0
+  - task:
+      type: Sentence completion
+    dataset:
+      type: xcopa
+      name: XCOPA
+      config: ht
+      split: validation
+      revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
+    metrics:
+    - type: Accuracy
+      value: 58.0
+  - task:
+      type: Sentence completion
+    dataset:
+      type: xcopa
+      name: XCOPA
+      config: id
+      split: validation
+      revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
+    metrics:
+    - type: Accuracy
+      value: 86.0
+  - task:
+      type: Sentence completion
+    dataset:
+      type: xcopa
+      name: XCOPA
+      config: it
+      split: validation
+      revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
+    metrics:
+    - type: Accuracy
+      value: 74.0
+  - task:
+      type: Sentence completion
+    dataset:
+      type: xcopa
+      name: XCOPA
+      config: qu
+      split: validation
+      revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
+    metrics:
+    - type: Accuracy
+      value: 56.0
+  - task:
+      type: Sentence completion
+    dataset:
+      type: xcopa
+      name: XCOPA
+      config: sw
+      split: validation
+      revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
+    metrics:
+    - type: Accuracy
+      value: 64.0
+  - task:
+      type: Sentence completion
+    dataset:
+      type: xcopa
+      name: XCOPA
+      config: ta
+      split: validation
+      revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
+    metrics:
+    - type: Accuracy
+      value: 69.0
+  - task:
+      type: Sentence completion
+    dataset:
+      type: xcopa
+      name: XCOPA
+      config: th
+      split: validation
+      revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
+    metrics:
+    - type: Accuracy
+      value: 58.0
+  - task:
+      type: Sentence completion
+    dataset:
+      type: xcopa
+      name: XCOPA
+      config: tr
+      split: validation
+      revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
+    metrics:
+    - type: Accuracy
+      value: 57.0
+  - task:
+      type: Sentence completion
+    dataset:
+      type: xcopa
+      name: XCOPA
+      config: vi
+      split: validation
+      revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
+    metrics:
+    - type: Accuracy
+      value: 87.0
+  - task:
+      type: Sentence completion
+    dataset:
+      type: xcopa
+      name: XCOPA
+      config: zh
+      split: validation
+      revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
+    metrics:
+    - type: Accuracy
+      value: 90.0
+  - task:
+      type: Sentence completion
+    dataset:
+      type: Muennighoff/xstory_cloze
+      name: XStoryCloze
+      config: ar
+      split: validation
+      revision: 8bb76e594b68147f1a430e86829d07189622b90d
+    metrics:
+    - type: Accuracy
+      value: 92.79
+  - task:
+      type: Sentence completion
+    dataset:
+      type: Muennighoff/xstory_cloze
+      name: XStoryCloze
+      config: es
+      split: validation
+      revision: 8bb76e594b68147f1a430e86829d07189622b90d
+    metrics:
+    - type: Accuracy
+      value: 94.37
+  - task:
+      type: Sentence completion
+    dataset:
+      type: Muennighoff/xstory_cloze
+      name: XStoryCloze
+      config: eu
+      split: validation
+      revision: 8bb76e594b68147f1a430e86829d07189622b90d
+    metrics:
+    - type: Accuracy
+      value: 86.9
+  - task:
+      type: Sentence completion
+    dataset:
+      type: Muennighoff/xstory_cloze
+      name: XStoryCloze
+      config: hi
+      split: validation
+      revision: 8bb76e594b68147f1a430e86829d07189622b90d
+    metrics:
+    - type: Accuracy
+      value: 88.42
+  - task:
+      type: Sentence completion
+    dataset:
+      type: Muennighoff/xstory_cloze
+      name: XStoryCloze
+      config: id
+      split: validation
+      revision: 8bb76e594b68147f1a430e86829d07189622b90d
+    metrics:
+    - type: Accuracy
+      value: 92.12
+  - task:
+      type: Sentence completion
+    dataset:
+      type: Muennighoff/xstory_cloze
+      name: XStoryCloze
+      config: my
+      split: validation
+      revision: 8bb76e594b68147f1a430e86829d07189622b90d
+    metrics:
+    - type: Accuracy
+      value: 52.35
+  - task:
+      type: Sentence completion
+    dataset:
+      type: Muennighoff/xstory_cloze
+      name: XStoryCloze
+      config: ru
+      split: validation
+      revision: 8bb76e594b68147f1a430e86829d07189622b90d
+    metrics:
+    - type: Accuracy
+      value: 81.73
+  - task:
+      type: Sentence completion
+    dataset:
+      type: Muennighoff/xstory_cloze
+      name: XStoryCloze
+      config: sw
+      split: validation
+      revision: 8bb76e594b68147f1a430e86829d07189622b90d
+    metrics:
+    - type: Accuracy
+      value: 79.81
+  - task:
+      type: Sentence completion
+    dataset:
+      type: Muennighoff/xstory_cloze
+      name: XStoryCloze
+      config: te
+      split: validation
+      revision: 8bb76e594b68147f1a430e86829d07189622b90d
+    metrics:
+    - type: Accuracy
+      value: 81.2
+  - task:
+      type: Sentence completion
+    dataset:
+      type: Muennighoff/xstory_cloze
+      name: XStoryCloze
+      config: zh
+      split: validation
+      revision: 8bb76e594b68147f1a430e86829d07189622b90d
+    metrics:
+    - type: Accuracy
+      value: 93.12
 ---
 ![xmtf](https://github.com/bigscience-workshop/xmtf/blob/master/xmtf_banner.png?raw=true)