Muennighoff
commited on
Commit
•
6fdddf4
1
Parent(s):
51b63aa
Add files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +5 -0
- 2b855b11boscar/transformers/config.json +1 -0
- 2b855b11boscar/transformers/pytorch_model.bin +3 -0
- 2b855b14boscar/transformers/config.json +1 -0
- 2b855b14boscar/transformers/pytorch_model.bin +3 -0
- 2b855b18boscar/transformers/config.json +1 -0
- 2b855b18boscar/transformers/pytorch_model.bin +3 -0
- 2b855b28boscar/transformers/config.json +1 -0
- 2b855b28boscar/transformers/pytorch_model.bin +3 -0
- 2b855b9boscar/transformers/config.json +1 -0
- 2b855b9boscar/transformers/pytorch_model.bin +3 -0
- evaluation/2b855b11boscar/agg.limited=3000.model=2b855b11boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:15.json +1 -0
- evaluation/2b855b11boscar/agg.limited=3000.model=2b855b11boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.json +1 -0
- evaluation/2b855b11boscar/agg.limited=3000.model=2b855b11boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.json +1 -0
- evaluation/2b855b11boscar/agg.limited=3000.model=2b855b11boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.json +1 -0
- evaluation/2b855b11boscar/examples.limited=3000.model=2b855b11boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:15.jsonl +0 -0
- evaluation/2b855b11boscar/examples.limited=3000.model=2b855b11boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.jsonl +3 -0
- evaluation/2b855b11boscar/examples.limited=3000.model=2b855b11boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.jsonl +0 -0
- evaluation/2b855b11boscar/examples.limited=3000.model=2b855b11boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.jsonl +0 -0
- evaluation/2b855b11boscar/merged.csv +5 -0
- evaluation/2b855b11boscar/merged.json +1 -0
- evaluation/2b855b11boscar/slim.limited=3000.model=2b855b11boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:15.json +133 -0
- evaluation/2b855b11boscar/slim.limited=3000.model=2b855b11boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.json +133 -0
- evaluation/2b855b11boscar/slim.limited=3000.model=2b855b11boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.json +133 -0
- evaluation/2b855b11boscar/slim.limited=3000.model=2b855b11boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.json +133 -0
- evaluation/2b855b14boscar/agg.limited=3000.model=2b855b14boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json +1 -0
- evaluation/2b855b14boscar/agg.limited=3000.model=2b855b14boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json +1 -0
- evaluation/2b855b14boscar/agg.limited=3000.model=2b855b14boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json +1 -0
- evaluation/2b855b14boscar/agg.limited=3000.model=2b855b14boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json +1 -0
- evaluation/2b855b14boscar/examples.limited=3000.model=2b855b14boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.jsonl +0 -0
- evaluation/2b855b14boscar/examples.limited=3000.model=2b855b14boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.jsonl +3 -0
- evaluation/2b855b14boscar/examples.limited=3000.model=2b855b14boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.jsonl +0 -0
- evaluation/2b855b14boscar/examples.limited=3000.model=2b855b14boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.jsonl +0 -0
- evaluation/2b855b14boscar/merged.csv +5 -0
- evaluation/2b855b14boscar/merged.json +1 -0
- evaluation/2b855b14boscar/slim.limited=3000.model=2b855b14boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json +133 -0
- evaluation/2b855b14boscar/slim.limited=3000.model=2b855b14boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json +133 -0
- evaluation/2b855b14boscar/slim.limited=3000.model=2b855b14boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json +133 -0
- evaluation/2b855b14boscar/slim.limited=3000.model=2b855b14boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json +133 -0
- evaluation/2b855b18boscar/agg.limited=3000.model=2b855b18boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.json +1 -0
- evaluation/2b855b18boscar/agg.limited=3000.model=2b855b18boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.json +1 -0
- evaluation/2b855b18boscar/agg.limited=3000.model=2b855b18boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.json +1 -0
- evaluation/2b855b18boscar/agg.limited=3000.model=2b855b18boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.json +1 -0
- evaluation/2b855b18boscar/examples.limited=3000.model=2b855b18boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.jsonl +0 -0
- evaluation/2b855b18boscar/examples.limited=3000.model=2b855b18boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.jsonl +3 -0
- evaluation/2b855b18boscar/examples.limited=3000.model=2b855b18boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.jsonl +0 -0
- evaluation/2b855b18boscar/examples.limited=3000.model=2b855b18boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.jsonl +0 -0
- evaluation/2b855b18boscar/merged.csv +5 -0
- evaluation/2b855b18boscar/merged.json +1 -0
- evaluation/2b855b18boscar/slim.limited=3000.model=2b855b18boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.json +133 -0
.gitattributes
CHANGED
@@ -32,3 +32,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
evaluation/2b855b11boscar/examples.limited=3000.model=2b855b11boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.jsonl filter=lfs diff=lfs merge=lfs -text
|
36 |
+
evaluation/2b855b14boscar/examples.limited=3000.model=2b855b14boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.jsonl filter=lfs diff=lfs merge=lfs -text
|
37 |
+
evaluation/2b855b18boscar/examples.limited=3000.model=2b855b18boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.jsonl filter=lfs diff=lfs merge=lfs -text
|
38 |
+
evaluation/2b855b28boscar/examples.limited=3000.model=2b855b28boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:00.jsonl filter=lfs diff=lfs merge=lfs -text
|
39 |
+
evaluation/2b855b9boscar/examples.limited=3000.model=2b855b9boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:07.jsonl filter=lfs diff=lfs merge=lfs -text
|
2b855b11boscar/transformers/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"vocab_size": 50304, "n_positions": 2048, "n_embd": 2560, "n_layer": 34, "n_head": 20, "n_inner": 10240, "activation_function": "gelu", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": null, "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "chunk_size_feed_forward": 0, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["GPT2LMHeadModel"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "pad_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "", "transformers_version": "4.25.0.dev0", "n_ctx": 1024, "gradient_checkpointing": false, "model_type": "gpt2"}
|
2b855b11boscar/transformers/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f18ecfd41684914e5e32992489524d81cf3dc807bc2f8e99dd2475561067d25
|
3 |
+
size 5903417421
|
2b855b14boscar/transformers/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"vocab_size": 50304, "n_positions": 2048, "n_embd": 2560, "n_layer": 34, "n_head": 20, "n_inner": 10240, "activation_function": "gelu", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": null, "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "chunk_size_feed_forward": 0, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["GPT2LMHeadModel"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "pad_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "", "transformers_version": "4.25.0.dev0", "n_ctx": 1024, "gradient_checkpointing": false, "model_type": "gpt2"}
|
2b855b14boscar/transformers/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f31f90c1b920ad4a8e94bbf5d54dea3fa650c64308ce0b25748acfdb28c433d4
|
3 |
+
size 5903417421
|
2b855b18boscar/transformers/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"vocab_size": 50304, "n_positions": 2048, "n_embd": 2560, "n_layer": 34, "n_head": 20, "n_inner": 10240, "activation_function": "gelu", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": null, "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "chunk_size_feed_forward": 0, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["GPT2LMHeadModel"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "pad_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "", "transformers_version": "4.25.0.dev0", "n_ctx": 1024, "gradient_checkpointing": false, "model_type": "gpt2"}
|
2b855b18boscar/transformers/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25c4d9f92d16773af27d913e462f037d83444e8b933ed631eefc217cd289a7cf
|
3 |
+
size 5903417421
|
2b855b28boscar/transformers/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"vocab_size": 50304, "n_positions": 2048, "n_embd": 2560, "n_layer": 34, "n_head": 20, "n_inner": 10240, "activation_function": "gelu", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": null, "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "chunk_size_feed_forward": 0, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["GPT2LMHeadModel"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "pad_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "", "transformers_version": "4.25.0.dev0", "n_ctx": 1024, "gradient_checkpointing": false, "model_type": "gpt2"}
|
2b855b28boscar/transformers/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35e1c4401632f5102562aa1cac23e464f703274b5c7d3c41c291d0a7a1cf2a10
|
3 |
+
size 5903417421
|
2b855b9boscar/transformers/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"vocab_size": 50304, "n_positions": 2048, "n_embd": 2560, "n_layer": 34, "n_head": 20, "n_inner": 10240, "activation_function": "gelu", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": null, "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "chunk_size_feed_forward": 0, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["GPT2LMHeadModel"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "pad_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "", "transformers_version": "4.25.0.dev0", "n_ctx": 1024, "gradient_checkpointing": false, "model_type": "gpt2"}
|
2b855b9boscar/transformers/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42ef0ea43e0469b507b2ceafb0ac70d2acfc20cf12abb02cfdb16a28536e21ac
|
3 |
+
size 5903417421
|
evaluation/2b855b11boscar/agg.limited=3000.model=2b855b11boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:15.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.5670683683631262, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03691812963807893}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.11985450006638845, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003997081880385905}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.30534108318007713, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005102257250624603}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.14031806501516378, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0032239768774188983}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.0572753090308458, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0026238936192447566}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1510776866655993, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0035280470438203}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.06730947418583354, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0021306342031554046}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.10809176961403877, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0035468769595252323}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2865306498323076, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004705210519435355}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.12788737000248987, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002762237488005244}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.11016029070235552, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003638263346244495}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.2877824706800937, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004695526640864335}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.1297204135381709, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0028458227925484865}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b11boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/2b855b11boscar/agg.limited=3000.model=2b855b11boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.20919650001888335, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003583043509232267}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.17493844549911547, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026283803509335227}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.1571887541624411, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020375285649514305}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.05054992887208443, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0022911980261077665}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.03448422957087202, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001216920370717321}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.031473064403187076, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010575542364599018}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.16528179444564603, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0030935248140823153}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.13383754374979825, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019789761470973464}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.12068346268863206, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00154598273316667}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.19791774077694188, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003419633975331504}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.16510303418072297, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002454383341362712}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.14837176714025335, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019049177952382387}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.9393823117358806, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05782921628778876}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b11boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/2b855b11boscar/agg.limited=3000.model=2b855b11boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 10.987268211526285, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.14130149844145432}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5357525639515004, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032064395910662274}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4180010693979301, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0030066188289859246}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.44393084664082105, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023662235864518972}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.24804399299580723, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0025702148050733026}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.1915388958648696, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0021539680976962266}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.2032453380877121, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001996627917318235}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.39258238338070933, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0028657446917592097}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3032919923656747, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024364322943428974}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3230020985473621, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020587720965347047}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4385832647435947, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0031247160846557594}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.34043638987624125, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027386004032157202}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.362241008064181, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00232603623141771}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b11boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/2b855b11boscar/agg.limited=3000.model=2b855b11boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.2019256154313508, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003580048253845401}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2600410618635924, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004208330764923628}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.20961994465841852, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0030523713872056104}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.040750189353702726, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002006592574380931}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.05255991583937774, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0022881459816807043}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.04207453889049326, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0018645507401809042}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.15214429231215956, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0029090529169949583}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.19285800670374653, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003181909555481545}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.156559567996843, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0024098196817411285}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.15687371596872893, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002917428527650363}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.20340928717747195, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003556071653948259}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.16288834178745557, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0025042802706722427}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.8418233784724891, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10153846480046211}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b11boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/2b855b11boscar/examples.limited=3000.model=2b855b11boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:15.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
evaluation/2b855b11boscar/examples.limited=3000.model=2b855b11boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d46304ce75a05829a5f59561e0d6c585c41e823ab51cb3b847a8670cc20b9706
|
3 |
+
size 13001176
|
evaluation/2b855b11boscar/examples.limited=3000.model=2b855b11boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
evaluation/2b855b11boscar/examples.limited=3000.model=2b855b11boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
evaluation/2b855b11boscar/merged.csv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset,prompt,metric,value
|
2 |
+
e2e_nlg_cleaned,generate_text_restaurant,rouge2_fmeasure,0.2032453380877121
|
3 |
+
gem_xsum,article_DOC_summary,rouge2_fmeasure,0.04207453889049326
|
4 |
+
web_nlg_en,PALM_prompt,rouge2_fmeasure,0.06730947418583354
|
5 |
+
wiki_lingua_en,tldr_en,rouge2_fmeasure,0.031473064403187076
|
evaluation/2b855b11boscar/merged.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"GEM/web_nlg_en": {"PALM_prompt": {"bleu": 0.5670683683631262, "bleu_stderr": 0.03691812963807893, "rouge1_fmeasure": 0.14031806501516378, "rouge1_fmeasure_stderr": 0.0032239768774188983, "rouge1_precision": 0.11985450006638845, "rouge1_precision_stderr": 0.003997081880385905, "rouge1_recall": 0.30534108318007713, "rouge1_recall_stderr": 0.005102257250624603, "rouge2_fmeasure": 0.06730947418583354, "rouge2_fmeasure_stderr": 0.0021306342031554046, "rouge2_precision": 0.0572753090308458, "rouge2_precision_stderr": 0.0026238936192447566, "rouge2_recall": 0.1510776866655993, "rouge2_recall_stderr": 0.0035280470438203, "rougeL_fmeasure": 0.12788737000248987, "rougeL_fmeasure_stderr": 0.002762237488005244, "rougeL_precision": 0.10809176961403877, "rougeL_precision_stderr": 0.0035468769595252323, "rougeL_recall": 0.2865306498323076, "rougeL_recall_stderr": 0.004705210519435355, "rougeLsum_fmeasure": 0.1297204135381709, "rougeLsum_fmeasure_stderr": 0.0028458227925484865, "rougeLsum_precision": 0.11016029070235552, "rougeLsum_precision_stderr": 0.003638263346244495, "rougeLsum_recall": 0.2877824706800937, "rougeLsum_recall_stderr": 0.004695526640864335}}, "GEM/wiki_lingua_en": {"tldr_en": {"bleu": 1.9393823117358806, "bleu_stderr": 0.05782921628778876, "rouge1_fmeasure": 0.1571887541624411, "rouge1_fmeasure_stderr": 0.0020375285649514305, "rouge1_precision": 0.20919650001888335, "rouge1_precision_stderr": 0.003583043509232267, "rouge1_recall": 0.17493844549911547, "rouge1_recall_stderr": 0.0026283803509335227, "rouge2_fmeasure": 0.031473064403187076, "rouge2_fmeasure_stderr": 0.0010575542364599018, "rouge2_precision": 0.05054992887208443, "rouge2_precision_stderr": 0.0022911980261077665, "rouge2_recall": 0.03448422957087202, "rouge2_recall_stderr": 0.001216920370717321, "rougeL_fmeasure": 0.12068346268863206, "rougeL_fmeasure_stderr": 0.00154598273316667, "rougeL_precision": 0.16528179444564603, "rougeL_precision_stderr": 0.0030935248140823153, "rougeL_recall": 0.13383754374979825, "rougeL_recall_stderr": 0.0019789761470973464, "rougeLsum_fmeasure": 0.14837176714025335, "rougeLsum_fmeasure_stderr": 0.0019049177952382387, "rougeLsum_precision": 0.19791774077694188, "rougeLsum_precision_stderr": 0.003419633975331504, "rougeLsum_recall": 0.16510303418072297, "rougeLsum_recall_stderr": 0.002454383341362712}}, "e2e_nlg_cleaned": {"generate_text_restaurant": {"bleu": 10.987268211526285, "bleu_stderr": 0.14130149844145432, "rouge1_fmeasure": 0.44393084664082105, "rouge1_fmeasure_stderr": 0.0023662235864518972, "rouge1_precision": 0.5357525639515004, "rouge1_precision_stderr": 0.0032064395910662274, "rouge1_recall": 0.4180010693979301, "rouge1_recall_stderr": 0.0030066188289859246, "rouge2_fmeasure": 0.2032453380877121, "rouge2_fmeasure_stderr": 0.001996627917318235, "rouge2_precision": 0.24804399299580723, "rouge2_precision_stderr": 0.0025702148050733026, "rouge2_recall": 0.1915388958648696, "rouge2_recall_stderr": 0.0021539680976962266, "rougeL_fmeasure": 0.3230020985473621, "rougeL_fmeasure_stderr": 0.0020587720965347047, "rougeL_precision": 0.39258238338070933, "rougeL_precision_stderr": 0.0028657446917592097, "rougeL_recall": 0.3032919923656747, "rougeL_recall_stderr": 0.0024364322943428974, "rougeLsum_fmeasure": 0.362241008064181, "rougeLsum_fmeasure_stderr": 0.00232603623141771, "rougeLsum_precision": 0.4385832647435947, "rougeLsum_precision_stderr": 0.0031247160846557594, "rougeLsum_recall": 0.34043638987624125, "rougeLsum_recall_stderr": 0.0027386004032157202}}, "gem_xsum": {"article_DOC_summary": {"bleu": 1.8418233784724891, "bleu_stderr": 0.10153846480046211, "rouge1_fmeasure": 0.20961994465841852, "rouge1_fmeasure_stderr": 0.0030523713872056104, "rouge1_precision": 0.2019256154313508, "rouge1_precision_stderr": 0.003580048253845401, "rouge1_recall": 0.2600410618635924, "rouge1_recall_stderr": 0.004208330764923628, "rouge2_fmeasure": 0.04207453889049326, "rouge2_fmeasure_stderr": 0.0018645507401809042, "rouge2_precision": 0.040750189353702726, "rouge2_precision_stderr": 0.002006592574380931, "rouge2_recall": 0.05255991583937774, "rouge2_recall_stderr": 0.0022881459816807043, "rougeL_fmeasure": 0.156559567996843, "rougeL_fmeasure_stderr": 0.0024098196817411285, "rougeL_precision": 0.15214429231215956, "rougeL_precision_stderr": 0.0029090529169949583, "rougeL_recall": 0.19285800670374653, "rougeL_recall_stderr": 0.003181909555481545, "rougeLsum_fmeasure": 0.16288834178745557, "rougeLsum_fmeasure_stderr": 0.0025042802706722427, "rougeLsum_precision": 0.15687371596872893, "rougeLsum_precision_stderr": 0.002917428527650363, "rougeLsum_recall": 0.20340928717747195, "rougeLsum_recall_stderr": 0.003556071653948259}}}
|
evaluation/2b855b11boscar/slim.limited=3000.model=2b855b11boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:15.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.5670683683631262,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.03691812963807893
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.11985450006638845,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.003997081880385905
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.30534108318007713,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.005102257250624603
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.14031806501516378,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0032239768774188983
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.0572753090308458,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0026238936192447566
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.1510776866655993,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0035280470438203
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.06730947418583354,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0021306342031554046
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.10809176961403877,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.0035468769595252323
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.2865306498323076,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004705210519435355
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.12788737000248987,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.002762237488005244
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.11016029070235552,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.003638263346244495
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.2877824706800937,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.004695526640864335
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.1297204135381709,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0028458227925484865
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b11boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 1,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
evaluation/2b855b11boscar/slim.limited=3000.model=2b855b11boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/wiki_lingua_en",
|
5 |
+
"prompt_name": "tldr_en",
|
6 |
+
"rouge1_precision": 0.20919650001888335,
|
7 |
+
"dataset_path": "GEM/wiki_lingua",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"rouge1_precision_stderr": 0.003583043509232267
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/wiki_lingua_en",
|
14 |
+
"prompt_name": "tldr_en",
|
15 |
+
"rouge1_recall": 0.17493844549911547,
|
16 |
+
"dataset_path": "GEM/wiki_lingua",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_recall_stderr": 0.0026283803509335227
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/wiki_lingua_en",
|
23 |
+
"prompt_name": "tldr_en",
|
24 |
+
"rouge1_fmeasure": 0.1571887541624411,
|
25 |
+
"dataset_path": "GEM/wiki_lingua",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_fmeasure_stderr": 0.0020375285649514305
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/wiki_lingua_en",
|
32 |
+
"prompt_name": "tldr_en",
|
33 |
+
"rouge2_precision": 0.05054992887208443,
|
34 |
+
"dataset_path": "GEM/wiki_lingua",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge2_precision_stderr": 0.0022911980261077665
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/wiki_lingua_en",
|
41 |
+
"prompt_name": "tldr_en",
|
42 |
+
"rouge2_recall": 0.03448422957087202,
|
43 |
+
"dataset_path": "GEM/wiki_lingua",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_recall_stderr": 0.001216920370717321
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/wiki_lingua_en",
|
50 |
+
"prompt_name": "tldr_en",
|
51 |
+
"rouge2_fmeasure": 0.031473064403187076,
|
52 |
+
"dataset_path": "GEM/wiki_lingua",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_fmeasure_stderr": 0.0010575542364599018
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/wiki_lingua_en",
|
59 |
+
"prompt_name": "tldr_en",
|
60 |
+
"rougeL_precision": 0.16528179444564603,
|
61 |
+
"dataset_path": "GEM/wiki_lingua",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rougeL_precision_stderr": 0.0030935248140823153
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/wiki_lingua_en",
|
68 |
+
"prompt_name": "tldr_en",
|
69 |
+
"rougeL_recall": 0.13383754374979825,
|
70 |
+
"dataset_path": "GEM/wiki_lingua",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_recall_stderr": 0.0019789761470973464
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/wiki_lingua_en",
|
77 |
+
"prompt_name": "tldr_en",
|
78 |
+
"rougeL_fmeasure": 0.12068346268863206,
|
79 |
+
"dataset_path": "GEM/wiki_lingua",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_fmeasure_stderr": 0.00154598273316667
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/wiki_lingua_en",
|
86 |
+
"prompt_name": "tldr_en",
|
87 |
+
"rougeLsum_precision": 0.19791774077694188,
|
88 |
+
"dataset_path": "GEM/wiki_lingua",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeLsum_precision_stderr": 0.003419633975331504
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/wiki_lingua_en",
|
95 |
+
"prompt_name": "tldr_en",
|
96 |
+
"rougeLsum_recall": 0.16510303418072297,
|
97 |
+
"dataset_path": "GEM/wiki_lingua",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_recall_stderr": 0.002454383341362712
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/wiki_lingua_en",
|
104 |
+
"prompt_name": "tldr_en",
|
105 |
+
"rougeLsum_fmeasure": 0.14837176714025335,
|
106 |
+
"dataset_path": "GEM/wiki_lingua",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.0019049177952382387
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/wiki_lingua_en",
|
113 |
+
"prompt_name": "tldr_en",
|
114 |
+
"bleu": 1.9393823117358806,
|
115 |
+
"dataset_path": "GEM/wiki_lingua",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"bleu_stderr": 0.05782921628778876
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b11boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 1,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
evaluation/2b855b11boscar/slim.limited=3000.model=2b855b11boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "e2e_nlg_cleaned",
|
5 |
+
"prompt_name": "generate_text_restaurant",
|
6 |
+
"bleu": 10.987268211526285,
|
7 |
+
"dataset_path": "e2e_nlg_cleaned",
|
8 |
+
"dataset_name": null,
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.14130149844145432
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "e2e_nlg_cleaned",
|
14 |
+
"prompt_name": "generate_text_restaurant",
|
15 |
+
"rouge1_precision": 0.5357525639515004,
|
16 |
+
"dataset_path": "e2e_nlg_cleaned",
|
17 |
+
"dataset_name": null,
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.0032064395910662274
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "e2e_nlg_cleaned",
|
23 |
+
"prompt_name": "generate_text_restaurant",
|
24 |
+
"rouge1_recall": 0.4180010693979301,
|
25 |
+
"dataset_path": "e2e_nlg_cleaned",
|
26 |
+
"dataset_name": null,
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.0030066188289859246
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "e2e_nlg_cleaned",
|
32 |
+
"prompt_name": "generate_text_restaurant",
|
33 |
+
"rouge1_fmeasure": 0.44393084664082105,
|
34 |
+
"dataset_path": "e2e_nlg_cleaned",
|
35 |
+
"dataset_name": null,
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0023662235864518972
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "e2e_nlg_cleaned",
|
41 |
+
"prompt_name": "generate_text_restaurant",
|
42 |
+
"rouge2_precision": 0.24804399299580723,
|
43 |
+
"dataset_path": "e2e_nlg_cleaned",
|
44 |
+
"dataset_name": null,
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0025702148050733026
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "e2e_nlg_cleaned",
|
50 |
+
"prompt_name": "generate_text_restaurant",
|
51 |
+
"rouge2_recall": 0.1915388958648696,
|
52 |
+
"dataset_path": "e2e_nlg_cleaned",
|
53 |
+
"dataset_name": null,
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0021539680976962266
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "e2e_nlg_cleaned",
|
59 |
+
"prompt_name": "generate_text_restaurant",
|
60 |
+
"rouge2_fmeasure": 0.2032453380877121,
|
61 |
+
"dataset_path": "e2e_nlg_cleaned",
|
62 |
+
"dataset_name": null,
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.001996627917318235
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "e2e_nlg_cleaned",
|
68 |
+
"prompt_name": "generate_text_restaurant",
|
69 |
+
"rougeL_precision": 0.39258238338070933,
|
70 |
+
"dataset_path": "e2e_nlg_cleaned",
|
71 |
+
"dataset_name": null,
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.0028657446917592097
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "e2e_nlg_cleaned",
|
77 |
+
"prompt_name": "generate_text_restaurant",
|
78 |
+
"rougeL_recall": 0.3032919923656747,
|
79 |
+
"dataset_path": "e2e_nlg_cleaned",
|
80 |
+
"dataset_name": null,
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.0024364322943428974
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "e2e_nlg_cleaned",
|
86 |
+
"prompt_name": "generate_text_restaurant",
|
87 |
+
"rougeL_fmeasure": 0.3230020985473621,
|
88 |
+
"dataset_path": "e2e_nlg_cleaned",
|
89 |
+
"dataset_name": null,
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0020587720965347047
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "e2e_nlg_cleaned",
|
95 |
+
"prompt_name": "generate_text_restaurant",
|
96 |
+
"rougeLsum_precision": 0.4385832647435947,
|
97 |
+
"dataset_path": "e2e_nlg_cleaned",
|
98 |
+
"dataset_name": null,
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.0031247160846557594
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "e2e_nlg_cleaned",
|
104 |
+
"prompt_name": "generate_text_restaurant",
|
105 |
+
"rougeLsum_recall": 0.34043638987624125,
|
106 |
+
"dataset_path": "e2e_nlg_cleaned",
|
107 |
+
"dataset_name": null,
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.0027386004032157202
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "e2e_nlg_cleaned",
|
113 |
+
"prompt_name": "generate_text_restaurant",
|
114 |
+
"rougeLsum_fmeasure": 0.362241008064181,
|
115 |
+
"dataset_path": "e2e_nlg_cleaned",
|
116 |
+
"dataset_name": null,
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.00232603623141771
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b11boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 1,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
evaluation/2b855b11boscar/slim.limited=3000.model=2b855b11boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:16.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "gem_xsum",
|
5 |
+
"prompt_name": "article_DOC_summary",
|
6 |
+
"rouge1_precision": 0.2019256154313508,
|
7 |
+
"dataset_path": "GEM/xsum",
|
8 |
+
"dataset_name": null,
|
9 |
+
"subset": "",
|
10 |
+
"rouge1_precision_stderr": 0.003580048253845401
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "gem_xsum",
|
14 |
+
"prompt_name": "article_DOC_summary",
|
15 |
+
"rouge1_recall": 0.2600410618635924,
|
16 |
+
"dataset_path": "GEM/xsum",
|
17 |
+
"dataset_name": null,
|
18 |
+
"subset": "",
|
19 |
+
"rouge1_recall_stderr": 0.004208330764923628
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "gem_xsum",
|
23 |
+
"prompt_name": "article_DOC_summary",
|
24 |
+
"rouge1_fmeasure": 0.20961994465841852,
|
25 |
+
"dataset_path": "GEM/xsum",
|
26 |
+
"dataset_name": null,
|
27 |
+
"subset": "",
|
28 |
+
"rouge1_fmeasure_stderr": 0.0030523713872056104
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "gem_xsum",
|
32 |
+
"prompt_name": "article_DOC_summary",
|
33 |
+
"rouge2_precision": 0.040750189353702726,
|
34 |
+
"dataset_path": "GEM/xsum",
|
35 |
+
"dataset_name": null,
|
36 |
+
"subset": "",
|
37 |
+
"rouge2_precision_stderr": 0.002006592574380931
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "gem_xsum",
|
41 |
+
"prompt_name": "article_DOC_summary",
|
42 |
+
"rouge2_recall": 0.05255991583937774,
|
43 |
+
"dataset_path": "GEM/xsum",
|
44 |
+
"dataset_name": null,
|
45 |
+
"subset": "",
|
46 |
+
"rouge2_recall_stderr": 0.0022881459816807043
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "gem_xsum",
|
50 |
+
"prompt_name": "article_DOC_summary",
|
51 |
+
"rouge2_fmeasure": 0.04207453889049326,
|
52 |
+
"dataset_path": "GEM/xsum",
|
53 |
+
"dataset_name": null,
|
54 |
+
"subset": "",
|
55 |
+
"rouge2_fmeasure_stderr": 0.0018645507401809042
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "gem_xsum",
|
59 |
+
"prompt_name": "article_DOC_summary",
|
60 |
+
"rougeL_precision": 0.15214429231215956,
|
61 |
+
"dataset_path": "GEM/xsum",
|
62 |
+
"dataset_name": null,
|
63 |
+
"subset": "",
|
64 |
+
"rougeL_precision_stderr": 0.0029090529169949583
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "gem_xsum",
|
68 |
+
"prompt_name": "article_DOC_summary",
|
69 |
+
"rougeL_recall": 0.19285800670374653,
|
70 |
+
"dataset_path": "GEM/xsum",
|
71 |
+
"dataset_name": null,
|
72 |
+
"subset": "",
|
73 |
+
"rougeL_recall_stderr": 0.003181909555481545
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "gem_xsum",
|
77 |
+
"prompt_name": "article_DOC_summary",
|
78 |
+
"rougeL_fmeasure": 0.156559567996843,
|
79 |
+
"dataset_path": "GEM/xsum",
|
80 |
+
"dataset_name": null,
|
81 |
+
"subset": "",
|
82 |
+
"rougeL_fmeasure_stderr": 0.0024098196817411285
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "gem_xsum",
|
86 |
+
"prompt_name": "article_DOC_summary",
|
87 |
+
"rougeLsum_precision": 0.15687371596872893,
|
88 |
+
"dataset_path": "GEM/xsum",
|
89 |
+
"dataset_name": null,
|
90 |
+
"subset": "",
|
91 |
+
"rougeLsum_precision_stderr": 0.002917428527650363
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "gem_xsum",
|
95 |
+
"prompt_name": "article_DOC_summary",
|
96 |
+
"rougeLsum_recall": 0.20340928717747195,
|
97 |
+
"dataset_path": "GEM/xsum",
|
98 |
+
"dataset_name": null,
|
99 |
+
"subset": "",
|
100 |
+
"rougeLsum_recall_stderr": 0.003556071653948259
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "gem_xsum",
|
104 |
+
"prompt_name": "article_DOC_summary",
|
105 |
+
"rougeLsum_fmeasure": 0.16288834178745557,
|
106 |
+
"dataset_path": "GEM/xsum",
|
107 |
+
"dataset_name": null,
|
108 |
+
"subset": "",
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.0025042802706722427
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "gem_xsum",
|
113 |
+
"prompt_name": "article_DOC_summary",
|
114 |
+
"bleu": 1.8418233784724891,
|
115 |
+
"dataset_path": "GEM/xsum",
|
116 |
+
"dataset_name": null,
|
117 |
+
"subset": "",
|
118 |
+
"bleu_stderr": 0.10153846480046211
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b11boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 1,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
evaluation/2b855b14boscar/agg.limited=3000.model=2b855b14boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.5267997188559879, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.026931356911439927}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.12909089328831622, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004066682179892196}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.30569567033986794, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0049742166850709245}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.14833814248028368, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0032956014234110094}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.06263849235788414, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0027648776425884146}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.15171838675716612, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003410889677834993}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.07091121401877826, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002122312057777099}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.11668156581037376, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003635933676657071}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2860888776540851, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004611942130409851}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.13505528369086345, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0028479908820402133}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.1195722732203613, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0037541107010187424}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.28914382725268084, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0046481719922350055}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.1377662390260723, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0029377080382907077}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b14boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/2b855b14boscar/agg.limited=3000.model=2b855b14boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.25514812406708015, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004391496180067704}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.17001950752336809, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002670040897065488}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.161637224440634, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020878980153854793}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.07353758394638825, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0030516615295547145}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.03669382694391697, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0012757094070944904}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.03612213684240494, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011871778135969023}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.20823727124359004, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003933686177209375}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.13155240237460783, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002051213056740241}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.12615501327244175, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016372299243071833}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.2415697060908809, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004234537804910785}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.15998696145503252, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0024973134235495914}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.15204551717535292, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019490519692789536}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.8209281343094799, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0674388661640857}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b14boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/2b855b14boscar/agg.limited=3000.model=2b855b14boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 11.667822101646196, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.16435225231530254}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5369015967281372, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003160704920883095}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.43015500314289395, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029641980031177677}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4522813670567056, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023179656743458677}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.25208635210451685, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0025699638882073126}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.19950887263978362, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002160714259051888}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.20989199851919094, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0020081610649898235}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3915161586108937, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002837201135072685}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.31091782219175945, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002429509359228759}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.32777555984149576, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020561131722717225}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.43989846755746614, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0030882286677311596}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3506965032863899, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027274058418149095}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3693672974349211, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002300533345597634}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b14boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/2b855b14boscar/agg.limited=3000.model=2b855b14boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.21292416056936542, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003721805621375851}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.26407363786023624, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004109965897647065}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.21871796538916172, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003190710417901308}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0459430535044871, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0020847588555945942}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.057194429918881434, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0023823758287989474}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.04707689967671619, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0019586543182300385}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.15958948191719216, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0029061613514662667}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.19892295961191558, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0032151265898104505}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.16392408526856198, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0024692316675247704}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.16065451543473253, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0029222291764312643}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.20102060582232475, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003333569558693489}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.16531795284794631, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002517788841816834}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 2.1787998041613217, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07216311362915621}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b14boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/2b855b14boscar/examples.limited=3000.model=2b855b14boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
evaluation/2b855b14boscar/examples.limited=3000.model=2b855b14boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c20b2d57ca392573d96b952bcc2a0556d913476d75ab5e84dadc5fee9772d5ca
|
3 |
+
size 12940583
|
evaluation/2b855b14boscar/examples.limited=3000.model=2b855b14boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
evaluation/2b855b14boscar/examples.limited=3000.model=2b855b14boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
evaluation/2b855b14boscar/merged.csv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset,prompt,metric,value
|
2 |
+
e2e_nlg_cleaned,generate_text_restaurant,rouge2_fmeasure,0.20989199851919094
|
3 |
+
gem_xsum,article_DOC_summary,rouge2_fmeasure,0.04707689967671619
|
4 |
+
web_nlg_en,PALM_prompt,rouge2_fmeasure,0.07091121401877826
|
5 |
+
wiki_lingua_en,tldr_en,rouge2_fmeasure,0.03612213684240494
|
evaluation/2b855b14boscar/merged.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"GEM/web_nlg_en": {"PALM_prompt": {"bleu": 0.5267997188559879, "bleu_stderr": 0.026931356911439927, "rouge1_fmeasure": 0.14833814248028368, "rouge1_fmeasure_stderr": 0.0032956014234110094, "rouge1_precision": 0.12909089328831622, "rouge1_precision_stderr": 0.004066682179892196, "rouge1_recall": 0.30569567033986794, "rouge1_recall_stderr": 0.0049742166850709245, "rouge2_fmeasure": 0.07091121401877826, "rouge2_fmeasure_stderr": 0.002122312057777099, "rouge2_precision": 0.06263849235788414, "rouge2_precision_stderr": 0.0027648776425884146, "rouge2_recall": 0.15171838675716612, "rouge2_recall_stderr": 0.003410889677834993, "rougeL_fmeasure": 0.13505528369086345, "rougeL_fmeasure_stderr": 0.0028479908820402133, "rougeL_precision": 0.11668156581037376, "rougeL_precision_stderr": 0.003635933676657071, "rougeL_recall": 0.2860888776540851, "rougeL_recall_stderr": 0.004611942130409851, "rougeLsum_fmeasure": 0.1377662390260723, "rougeLsum_fmeasure_stderr": 0.0029377080382907077, "rougeLsum_precision": 0.1195722732203613, "rougeLsum_precision_stderr": 0.0037541107010187424, "rougeLsum_recall": 0.28914382725268084, "rougeLsum_recall_stderr": 0.0046481719922350055}}, "GEM/wiki_lingua_en": {"tldr_en": {"bleu": 1.8209281343094799, "bleu_stderr": 0.0674388661640857, "rouge1_fmeasure": 0.161637224440634, "rouge1_fmeasure_stderr": 0.0020878980153854793, "rouge1_precision": 0.25514812406708015, "rouge1_precision_stderr": 0.004391496180067704, "rouge1_recall": 0.17001950752336809, "rouge1_recall_stderr": 0.002670040897065488, "rouge2_fmeasure": 0.03612213684240494, "rouge2_fmeasure_stderr": 0.0011871778135969023, "rouge2_precision": 0.07353758394638825, "rouge2_precision_stderr": 0.0030516615295547145, "rouge2_recall": 0.03669382694391697, "rouge2_recall_stderr": 0.0012757094070944904, "rougeL_fmeasure": 0.12615501327244175, "rougeL_fmeasure_stderr": 0.0016372299243071833, "rougeL_precision": 0.20823727124359004, "rougeL_precision_stderr": 0.003933686177209375, "rougeL_recall": 0.13155240237460783, "rougeL_recall_stderr": 0.002051213056740241, "rougeLsum_fmeasure": 0.15204551717535292, "rougeLsum_fmeasure_stderr": 0.0019490519692789536, "rougeLsum_precision": 0.2415697060908809, "rougeLsum_precision_stderr": 0.004234537804910785, "rougeLsum_recall": 0.15998696145503252, "rougeLsum_recall_stderr": 0.0024973134235495914}}, "e2e_nlg_cleaned": {"generate_text_restaurant": {"bleu": 11.667822101646196, "bleu_stderr": 0.16435225231530254, "rouge1_fmeasure": 0.4522813670567056, "rouge1_fmeasure_stderr": 0.0023179656743458677, "rouge1_precision": 0.5369015967281372, "rouge1_precision_stderr": 0.003160704920883095, "rouge1_recall": 0.43015500314289395, "rouge1_recall_stderr": 0.0029641980031177677, "rouge2_fmeasure": 0.20989199851919094, "rouge2_fmeasure_stderr": 0.0020081610649898235, "rouge2_precision": 0.25208635210451685, "rouge2_precision_stderr": 0.0025699638882073126, "rouge2_recall": 0.19950887263978362, "rouge2_recall_stderr": 0.002160714259051888, "rougeL_fmeasure": 0.32777555984149576, "rougeL_fmeasure_stderr": 0.0020561131722717225, "rougeL_precision": 0.3915161586108937, "rougeL_precision_stderr": 0.002837201135072685, "rougeL_recall": 0.31091782219175945, "rougeL_recall_stderr": 0.002429509359228759, "rougeLsum_fmeasure": 0.3693672974349211, "rougeLsum_fmeasure_stderr": 0.002300533345597634, "rougeLsum_precision": 0.43989846755746614, "rougeLsum_precision_stderr": 0.0030882286677311596, "rougeLsum_recall": 0.3506965032863899, "rougeLsum_recall_stderr": 0.0027274058418149095}}, "gem_xsum": {"article_DOC_summary": {"bleu": 2.1787998041613217, "bleu_stderr": 0.07216311362915621, "rouge1_fmeasure": 0.21871796538916172, "rouge1_fmeasure_stderr": 0.003190710417901308, "rouge1_precision": 0.21292416056936542, "rouge1_precision_stderr": 0.003721805621375851, "rouge1_recall": 0.26407363786023624, "rouge1_recall_stderr": 0.004109965897647065, "rouge2_fmeasure": 0.04707689967671619, "rouge2_fmeasure_stderr": 0.0019586543182300385, "rouge2_precision": 0.0459430535044871, "rouge2_precision_stderr": 0.0020847588555945942, "rouge2_recall": 0.057194429918881434, "rouge2_recall_stderr": 0.0023823758287989474, "rougeL_fmeasure": 0.16392408526856198, "rougeL_fmeasure_stderr": 0.0024692316675247704, "rougeL_precision": 0.15958948191719216, "rougeL_precision_stderr": 0.0029061613514662667, "rougeL_recall": 0.19892295961191558, "rougeL_recall_stderr": 0.0032151265898104505, "rougeLsum_fmeasure": 0.16531795284794631, "rougeLsum_fmeasure_stderr": 0.002517788841816834, "rougeLsum_precision": 0.16065451543473253, "rougeLsum_precision_stderr": 0.0029222291764312643, "rougeLsum_recall": 0.20102060582232475, "rougeLsum_recall_stderr": 0.003333569558693489}}}
|
evaluation/2b855b14boscar/slim.limited=3000.model=2b855b14boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.5267997188559879,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.026931356911439927
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.12909089328831622,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.004066682179892196
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.30569567033986794,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.0049742166850709245
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.14833814248028368,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0032956014234110094
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.06263849235788414,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0027648776425884146
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.15171838675716612,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.003410889677834993
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.07091121401877826,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.002122312057777099
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.11668156581037376,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.003635933676657071
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.2860888776540851,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004611942130409851
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.13505528369086345,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0028479908820402133
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.1195722732203613,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.0037541107010187424
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.28914382725268084,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.0046481719922350055
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.1377662390260723,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0029377080382907077
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b14boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 1,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
evaluation/2b855b14boscar/slim.limited=3000.model=2b855b14boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/wiki_lingua_en",
|
5 |
+
"prompt_name": "tldr_en",
|
6 |
+
"rouge1_precision": 0.25514812406708015,
|
7 |
+
"dataset_path": "GEM/wiki_lingua",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"rouge1_precision_stderr": 0.004391496180067704
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/wiki_lingua_en",
|
14 |
+
"prompt_name": "tldr_en",
|
15 |
+
"rouge1_recall": 0.17001950752336809,
|
16 |
+
"dataset_path": "GEM/wiki_lingua",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_recall_stderr": 0.002670040897065488
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/wiki_lingua_en",
|
23 |
+
"prompt_name": "tldr_en",
|
24 |
+
"rouge1_fmeasure": 0.161637224440634,
|
25 |
+
"dataset_path": "GEM/wiki_lingua",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_fmeasure_stderr": 0.0020878980153854793
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/wiki_lingua_en",
|
32 |
+
"prompt_name": "tldr_en",
|
33 |
+
"rouge2_precision": 0.07353758394638825,
|
34 |
+
"dataset_path": "GEM/wiki_lingua",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge2_precision_stderr": 0.0030516615295547145
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/wiki_lingua_en",
|
41 |
+
"prompt_name": "tldr_en",
|
42 |
+
"rouge2_recall": 0.03669382694391697,
|
43 |
+
"dataset_path": "GEM/wiki_lingua",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_recall_stderr": 0.0012757094070944904
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/wiki_lingua_en",
|
50 |
+
"prompt_name": "tldr_en",
|
51 |
+
"rouge2_fmeasure": 0.03612213684240494,
|
52 |
+
"dataset_path": "GEM/wiki_lingua",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_fmeasure_stderr": 0.0011871778135969023
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/wiki_lingua_en",
|
59 |
+
"prompt_name": "tldr_en",
|
60 |
+
"rougeL_precision": 0.20823727124359004,
|
61 |
+
"dataset_path": "GEM/wiki_lingua",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rougeL_precision_stderr": 0.003933686177209375
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/wiki_lingua_en",
|
68 |
+
"prompt_name": "tldr_en",
|
69 |
+
"rougeL_recall": 0.13155240237460783,
|
70 |
+
"dataset_path": "GEM/wiki_lingua",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_recall_stderr": 0.002051213056740241
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/wiki_lingua_en",
|
77 |
+
"prompt_name": "tldr_en",
|
78 |
+
"rougeL_fmeasure": 0.12615501327244175,
|
79 |
+
"dataset_path": "GEM/wiki_lingua",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_fmeasure_stderr": 0.0016372299243071833
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/wiki_lingua_en",
|
86 |
+
"prompt_name": "tldr_en",
|
87 |
+
"rougeLsum_precision": 0.2415697060908809,
|
88 |
+
"dataset_path": "GEM/wiki_lingua",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeLsum_precision_stderr": 0.004234537804910785
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/wiki_lingua_en",
|
95 |
+
"prompt_name": "tldr_en",
|
96 |
+
"rougeLsum_recall": 0.15998696145503252,
|
97 |
+
"dataset_path": "GEM/wiki_lingua",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_recall_stderr": 0.0024973134235495914
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/wiki_lingua_en",
|
104 |
+
"prompt_name": "tldr_en",
|
105 |
+
"rougeLsum_fmeasure": 0.15204551717535292,
|
106 |
+
"dataset_path": "GEM/wiki_lingua",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.0019490519692789536
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/wiki_lingua_en",
|
113 |
+
"prompt_name": "tldr_en",
|
114 |
+
"bleu": 1.8209281343094799,
|
115 |
+
"dataset_path": "GEM/wiki_lingua",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"bleu_stderr": 0.0674388661640857
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b14boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 1,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
evaluation/2b855b14boscar/slim.limited=3000.model=2b855b14boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "e2e_nlg_cleaned",
|
5 |
+
"prompt_name": "generate_text_restaurant",
|
6 |
+
"bleu": 11.667822101646196,
|
7 |
+
"dataset_path": "e2e_nlg_cleaned",
|
8 |
+
"dataset_name": null,
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.16435225231530254
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "e2e_nlg_cleaned",
|
14 |
+
"prompt_name": "generate_text_restaurant",
|
15 |
+
"rouge1_precision": 0.5369015967281372,
|
16 |
+
"dataset_path": "e2e_nlg_cleaned",
|
17 |
+
"dataset_name": null,
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.003160704920883095
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "e2e_nlg_cleaned",
|
23 |
+
"prompt_name": "generate_text_restaurant",
|
24 |
+
"rouge1_recall": 0.43015500314289395,
|
25 |
+
"dataset_path": "e2e_nlg_cleaned",
|
26 |
+
"dataset_name": null,
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.0029641980031177677
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "e2e_nlg_cleaned",
|
32 |
+
"prompt_name": "generate_text_restaurant",
|
33 |
+
"rouge1_fmeasure": 0.4522813670567056,
|
34 |
+
"dataset_path": "e2e_nlg_cleaned",
|
35 |
+
"dataset_name": null,
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0023179656743458677
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "e2e_nlg_cleaned",
|
41 |
+
"prompt_name": "generate_text_restaurant",
|
42 |
+
"rouge2_precision": 0.25208635210451685,
|
43 |
+
"dataset_path": "e2e_nlg_cleaned",
|
44 |
+
"dataset_name": null,
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0025699638882073126
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "e2e_nlg_cleaned",
|
50 |
+
"prompt_name": "generate_text_restaurant",
|
51 |
+
"rouge2_recall": 0.19950887263978362,
|
52 |
+
"dataset_path": "e2e_nlg_cleaned",
|
53 |
+
"dataset_name": null,
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.002160714259051888
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "e2e_nlg_cleaned",
|
59 |
+
"prompt_name": "generate_text_restaurant",
|
60 |
+
"rouge2_fmeasure": 0.20989199851919094,
|
61 |
+
"dataset_path": "e2e_nlg_cleaned",
|
62 |
+
"dataset_name": null,
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0020081610649898235
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "e2e_nlg_cleaned",
|
68 |
+
"prompt_name": "generate_text_restaurant",
|
69 |
+
"rougeL_precision": 0.3915161586108937,
|
70 |
+
"dataset_path": "e2e_nlg_cleaned",
|
71 |
+
"dataset_name": null,
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.002837201135072685
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "e2e_nlg_cleaned",
|
77 |
+
"prompt_name": "generate_text_restaurant",
|
78 |
+
"rougeL_recall": 0.31091782219175945,
|
79 |
+
"dataset_path": "e2e_nlg_cleaned",
|
80 |
+
"dataset_name": null,
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.002429509359228759
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "e2e_nlg_cleaned",
|
86 |
+
"prompt_name": "generate_text_restaurant",
|
87 |
+
"rougeL_fmeasure": 0.32777555984149576,
|
88 |
+
"dataset_path": "e2e_nlg_cleaned",
|
89 |
+
"dataset_name": null,
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0020561131722717225
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "e2e_nlg_cleaned",
|
95 |
+
"prompt_name": "generate_text_restaurant",
|
96 |
+
"rougeLsum_precision": 0.43989846755746614,
|
97 |
+
"dataset_path": "e2e_nlg_cleaned",
|
98 |
+
"dataset_name": null,
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.0030882286677311596
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "e2e_nlg_cleaned",
|
104 |
+
"prompt_name": "generate_text_restaurant",
|
105 |
+
"rougeLsum_recall": 0.3506965032863899,
|
106 |
+
"dataset_path": "e2e_nlg_cleaned",
|
107 |
+
"dataset_name": null,
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.0027274058418149095
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "e2e_nlg_cleaned",
|
113 |
+
"prompt_name": "generate_text_restaurant",
|
114 |
+
"rougeLsum_fmeasure": 0.3693672974349211,
|
115 |
+
"dataset_path": "e2e_nlg_cleaned",
|
116 |
+
"dataset_name": null,
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.002300533345597634
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b14boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 1,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
evaluation/2b855b14boscar/slim.limited=3000.model=2b855b14boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:09.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "gem_xsum",
|
5 |
+
"prompt_name": "article_DOC_summary",
|
6 |
+
"rouge1_precision": 0.21292416056936542,
|
7 |
+
"dataset_path": "GEM/xsum",
|
8 |
+
"dataset_name": null,
|
9 |
+
"subset": "",
|
10 |
+
"rouge1_precision_stderr": 0.003721805621375851
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "gem_xsum",
|
14 |
+
"prompt_name": "article_DOC_summary",
|
15 |
+
"rouge1_recall": 0.26407363786023624,
|
16 |
+
"dataset_path": "GEM/xsum",
|
17 |
+
"dataset_name": null,
|
18 |
+
"subset": "",
|
19 |
+
"rouge1_recall_stderr": 0.004109965897647065
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "gem_xsum",
|
23 |
+
"prompt_name": "article_DOC_summary",
|
24 |
+
"rouge1_fmeasure": 0.21871796538916172,
|
25 |
+
"dataset_path": "GEM/xsum",
|
26 |
+
"dataset_name": null,
|
27 |
+
"subset": "",
|
28 |
+
"rouge1_fmeasure_stderr": 0.003190710417901308
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "gem_xsum",
|
32 |
+
"prompt_name": "article_DOC_summary",
|
33 |
+
"rouge2_precision": 0.0459430535044871,
|
34 |
+
"dataset_path": "GEM/xsum",
|
35 |
+
"dataset_name": null,
|
36 |
+
"subset": "",
|
37 |
+
"rouge2_precision_stderr": 0.0020847588555945942
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "gem_xsum",
|
41 |
+
"prompt_name": "article_DOC_summary",
|
42 |
+
"rouge2_recall": 0.057194429918881434,
|
43 |
+
"dataset_path": "GEM/xsum",
|
44 |
+
"dataset_name": null,
|
45 |
+
"subset": "",
|
46 |
+
"rouge2_recall_stderr": 0.0023823758287989474
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "gem_xsum",
|
50 |
+
"prompt_name": "article_DOC_summary",
|
51 |
+
"rouge2_fmeasure": 0.04707689967671619,
|
52 |
+
"dataset_path": "GEM/xsum",
|
53 |
+
"dataset_name": null,
|
54 |
+
"subset": "",
|
55 |
+
"rouge2_fmeasure_stderr": 0.0019586543182300385
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "gem_xsum",
|
59 |
+
"prompt_name": "article_DOC_summary",
|
60 |
+
"rougeL_precision": 0.15958948191719216,
|
61 |
+
"dataset_path": "GEM/xsum",
|
62 |
+
"dataset_name": null,
|
63 |
+
"subset": "",
|
64 |
+
"rougeL_precision_stderr": 0.0029061613514662667
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "gem_xsum",
|
68 |
+
"prompt_name": "article_DOC_summary",
|
69 |
+
"rougeL_recall": 0.19892295961191558,
|
70 |
+
"dataset_path": "GEM/xsum",
|
71 |
+
"dataset_name": null,
|
72 |
+
"subset": "",
|
73 |
+
"rougeL_recall_stderr": 0.0032151265898104505
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "gem_xsum",
|
77 |
+
"prompt_name": "article_DOC_summary",
|
78 |
+
"rougeL_fmeasure": 0.16392408526856198,
|
79 |
+
"dataset_path": "GEM/xsum",
|
80 |
+
"dataset_name": null,
|
81 |
+
"subset": "",
|
82 |
+
"rougeL_fmeasure_stderr": 0.0024692316675247704
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "gem_xsum",
|
86 |
+
"prompt_name": "article_DOC_summary",
|
87 |
+
"rougeLsum_precision": 0.16065451543473253,
|
88 |
+
"dataset_path": "GEM/xsum",
|
89 |
+
"dataset_name": null,
|
90 |
+
"subset": "",
|
91 |
+
"rougeLsum_precision_stderr": 0.0029222291764312643
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "gem_xsum",
|
95 |
+
"prompt_name": "article_DOC_summary",
|
96 |
+
"rougeLsum_recall": 0.20102060582232475,
|
97 |
+
"dataset_path": "GEM/xsum",
|
98 |
+
"dataset_name": null,
|
99 |
+
"subset": "",
|
100 |
+
"rougeLsum_recall_stderr": 0.003333569558693489
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "gem_xsum",
|
104 |
+
"prompt_name": "article_DOC_summary",
|
105 |
+
"rougeLsum_fmeasure": 0.16531795284794631,
|
106 |
+
"dataset_path": "GEM/xsum",
|
107 |
+
"dataset_name": null,
|
108 |
+
"subset": "",
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.002517788841816834
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "gem_xsum",
|
113 |
+
"prompt_name": "article_DOC_summary",
|
114 |
+
"bleu": 2.1787998041613217,
|
115 |
+
"dataset_path": "GEM/xsum",
|
116 |
+
"dataset_name": null,
|
117 |
+
"subset": "",
|
118 |
+
"bleu_stderr": 0.07216311362915621
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b14boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 1,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
evaluation/2b855b18boscar/agg.limited=3000.model=2b855b18boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.5436356416692493, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02090067219456134}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.1228351349022992, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00392578813475147}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3136012240707429, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005026673291848817}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.14944791586452452, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0034179743094259188}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.06036359127436263, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0026256930214199415}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1540821651813354, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0034453725908332538}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.07234537308681589, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002243522676458075}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.10986471002060917, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003424244924485874}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2933094278447532, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0046414637659723335}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.13546212027219454, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0029381048148479504}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.11224637006619154, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0034996478870552026}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.2955445013343594, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004644469281902261}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.1379443115608104, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0030157871158242528}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b18boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/2b855b18boscar/agg.limited=3000.model=2b855b18boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.18629767467864103, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003355430558115201}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.16645730853569463, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026048912262074822}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.1453399751111179, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.00196879821441486}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.04001176794354725, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0020377906175165516}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.029825045227337654, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0011666525886938686}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.02614621079237287, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009675998111133414}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.1481511033911748, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002894489102534062}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.12866623651413392, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001979198149413099}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.11235464513573835, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00148075522521234}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.17611517735021182, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0031985796393621735}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.15661935795399035, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002417653548807446}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.13684314528525174, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001822606254942644}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.7359046745646356, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08655725250035416}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b18boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/2b855b18boscar/agg.limited=3000.model=2b855b18boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 11.166672084026482, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12938408324856954}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5392369439533705, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003158080318819542}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.425544836011795, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0030086083040162534}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4507990191031002, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023579402575316694}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.24923987871776376, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002522668031161101}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.1949319860323451, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0021372524644967545}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.20627425694143425, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0019815596351453494}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3897106480194717, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0027884636517831116}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.30446876260154965, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002389473665831708}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.32349814931755977, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002012282170494305}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4377783545729113, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003032707862826963}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3442594533210036, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002717350593353686}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.36512835271987504, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022891431188154254}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b18boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/2b855b18boscar/agg.limited=3000.model=2b855b18boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.16157697077062325, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002776842105299102}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.30100429264695533, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004264479846806996}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.19714595265287357, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002646855973049805}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.03308738040114966, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0014221484334913485}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.06350009974686657, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00247767837714121}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.04048278505890417, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0015333371330976875}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.12299919811859228, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002086499912033289}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.2315691172099023, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0033296319052555676}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.1505795642520338, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019961982480274513}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.1256692232746594, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0021343388989720014}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.2389561587505367, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003685767784931331}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.1544789785933326, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0021325096133157765}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.692738611401789, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08350543592873425}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b18boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/2b855b18boscar/examples.limited=3000.model=2b855b18boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
evaluation/2b855b18boscar/examples.limited=3000.model=2b855b18boscar.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71d4549493ccf150e7fdd925f40f97a4ad633e422333ad0839b5d6c6db8513eb
|
3 |
+
size 13018690
|
evaluation/2b855b18boscar/examples.limited=3000.model=2b855b18boscar.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
evaluation/2b855b18boscar/examples.limited=3000.model=2b855b18boscar.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
evaluation/2b855b18boscar/merged.csv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset,prompt,metric,value
|
2 |
+
e2e_nlg_cleaned,generate_text_restaurant,rouge2_fmeasure,0.20627425694143425
|
3 |
+
gem_xsum,article_DOC_summary,rouge2_fmeasure,0.04048278505890417
|
4 |
+
web_nlg_en,PALM_prompt,rouge2_fmeasure,0.07234537308681589
|
5 |
+
wiki_lingua_en,tldr_en,rouge2_fmeasure,0.02614621079237287
|
evaluation/2b855b18boscar/merged.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"GEM/web_nlg_en": {"PALM_prompt": {"bleu": 0.5436356416692493, "bleu_stderr": 0.02090067219456134, "rouge1_fmeasure": 0.14944791586452452, "rouge1_fmeasure_stderr": 0.0034179743094259188, "rouge1_precision": 0.1228351349022992, "rouge1_precision_stderr": 0.00392578813475147, "rouge1_recall": 0.3136012240707429, "rouge1_recall_stderr": 0.005026673291848817, "rouge2_fmeasure": 0.07234537308681589, "rouge2_fmeasure_stderr": 0.002243522676458075, "rouge2_precision": 0.06036359127436263, "rouge2_precision_stderr": 0.0026256930214199415, "rouge2_recall": 0.1540821651813354, "rouge2_recall_stderr": 0.0034453725908332538, "rougeL_fmeasure": 0.13546212027219454, "rougeL_fmeasure_stderr": 0.0029381048148479504, "rougeL_precision": 0.10986471002060917, "rougeL_precision_stderr": 0.003424244924485874, "rougeL_recall": 0.2933094278447532, "rougeL_recall_stderr": 0.0046414637659723335, "rougeLsum_fmeasure": 0.1379443115608104, "rougeLsum_fmeasure_stderr": 0.0030157871158242528, "rougeLsum_precision": 0.11224637006619154, "rougeLsum_precision_stderr": 0.0034996478870552026, "rougeLsum_recall": 0.2955445013343594, "rougeLsum_recall_stderr": 0.004644469281902261}}, "GEM/wiki_lingua_en": {"tldr_en": {"bleu": 1.7359046745646356, "bleu_stderr": 0.08655725250035416, "rouge1_fmeasure": 0.1453399751111179, "rouge1_fmeasure_stderr": 0.00196879821441486, "rouge1_precision": 0.18629767467864103, "rouge1_precision_stderr": 0.003355430558115201, "rouge1_recall": 0.16645730853569463, "rouge1_recall_stderr": 0.0026048912262074822, "rouge2_fmeasure": 0.02614621079237287, "rouge2_fmeasure_stderr": 0.0009675998111133414, "rouge2_precision": 0.04001176794354725, "rouge2_precision_stderr": 0.0020377906175165516, "rouge2_recall": 0.029825045227337654, "rouge2_recall_stderr": 0.0011666525886938686, "rougeL_fmeasure": 0.11235464513573835, "rougeL_fmeasure_stderr": 0.00148075522521234, "rougeL_precision": 0.1481511033911748, "rougeL_precision_stderr": 0.002894489102534062, "rougeL_recall": 0.12866623651413392, "rougeL_recall_stderr": 0.001979198149413099, "rougeLsum_fmeasure": 0.13684314528525174, "rougeLsum_fmeasure_stderr": 0.001822606254942644, "rougeLsum_precision": 0.17611517735021182, "rougeLsum_precision_stderr": 0.0031985796393621735, "rougeLsum_recall": 0.15661935795399035, "rougeLsum_recall_stderr": 0.002417653548807446}}, "e2e_nlg_cleaned": {"generate_text_restaurant": {"bleu": 11.166672084026482, "bleu_stderr": 0.12938408324856954, "rouge1_fmeasure": 0.4507990191031002, "rouge1_fmeasure_stderr": 0.0023579402575316694, "rouge1_precision": 0.5392369439533705, "rouge1_precision_stderr": 0.003158080318819542, "rouge1_recall": 0.425544836011795, "rouge1_recall_stderr": 0.0030086083040162534, "rouge2_fmeasure": 0.20627425694143425, "rouge2_fmeasure_stderr": 0.0019815596351453494, "rouge2_precision": 0.24923987871776376, "rouge2_precision_stderr": 0.002522668031161101, "rouge2_recall": 0.1949319860323451, "rouge2_recall_stderr": 0.0021372524644967545, "rougeL_fmeasure": 0.32349814931755977, "rougeL_fmeasure_stderr": 0.002012282170494305, "rougeL_precision": 0.3897106480194717, "rougeL_precision_stderr": 0.0027884636517831116, "rougeL_recall": 0.30446876260154965, "rougeL_recall_stderr": 0.002389473665831708, "rougeLsum_fmeasure": 0.36512835271987504, "rougeLsum_fmeasure_stderr": 0.0022891431188154254, "rougeLsum_precision": 0.4377783545729113, "rougeLsum_precision_stderr": 0.003032707862826963, "rougeLsum_recall": 0.3442594533210036, "rougeLsum_recall_stderr": 0.002717350593353686}}, "gem_xsum": {"article_DOC_summary": {"bleu": 1.692738611401789, "bleu_stderr": 0.08350543592873425, "rouge1_fmeasure": 0.19714595265287357, "rouge1_fmeasure_stderr": 0.002646855973049805, "rouge1_precision": 0.16157697077062325, "rouge1_precision_stderr": 0.002776842105299102, "rouge1_recall": 0.30100429264695533, "rouge1_recall_stderr": 0.004264479846806996, "rouge2_fmeasure": 0.04048278505890417, "rouge2_fmeasure_stderr": 0.0015333371330976875, "rouge2_precision": 0.03308738040114966, "rouge2_precision_stderr": 0.0014221484334913485, "rouge2_recall": 0.06350009974686657, "rouge2_recall_stderr": 0.00247767837714121, "rougeL_fmeasure": 0.1505795642520338, "rougeL_fmeasure_stderr": 0.0019961982480274513, "rougeL_precision": 0.12299919811859228, "rougeL_precision_stderr": 0.002086499912033289, "rougeL_recall": 0.2315691172099023, "rougeL_recall_stderr": 0.0033296319052555676, "rougeLsum_fmeasure": 0.1544789785933326, "rougeLsum_fmeasure_stderr": 0.0021325096133157765, "rougeLsum_precision": 0.1256692232746594, "rougeLsum_precision_stderr": 0.0021343388989720014, "rougeLsum_recall": 0.2389561587505367, "rougeLsum_recall_stderr": 0.003685767784931331}}}
|
evaluation/2b855b18boscar/slim.limited=3000.model=2b855b18boscar.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2022-12-28T13:06:02.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.5436356416692493,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.02090067219456134
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.1228351349022992,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.00392578813475147
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.3136012240707429,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.005026673291848817
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.14944791586452452,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0034179743094259188
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.06036359127436263,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0026256930214199415
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.1540821651813354,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0034453725908332538
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.07234537308681589,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.002243522676458075
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.10986471002060917,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.003424244924485874
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.2933094278447532,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.0046414637659723335
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.13546212027219454,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0029381048148479504
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.11224637006619154,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.0034996478870552026
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.2955445013343594,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.004644469281902261
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.1379443115608104,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0030157871158242528
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscar-repetitions/2b855b18boscar/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 1,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|