Update metadata
#4
by
iarcuschin
- opened
- benchmark_cases_metadata.csv +19 -19
- benchmark_cases_metadata.parquet +2 -2
- benchmark_metadata.json +84 -18
- benchmark_metadata_croissant.json +60 -0
benchmark_cases_metadata.csv
CHANGED
@@ -1,19 +1,19 @@
|
|
1 |
-
case_id,url,task_description,max_seq_len,min_seq_len,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.act_fn,training_args.clip_grad_norm,training_args.lr_scheduler,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.checkpoint_index,transformer_cfg.checkpoint_label_type,transformer_cfg.checkpoint_value,transformer_cfg.tokenizer_name,transformer_cfg.window_size,transformer_cfg.attn_types,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.device,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.rotary_dim,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.n_key_value_heads,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs
|
2 |
-
11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1460593486680443,True,False,standard,False,5.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
3 |
-
13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
4 |
-
18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
5 |
-
19,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19,Removes consecutive duplicate tokens from a sequence.,15,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,32.0,15.0,8.0,custom,4.0,128.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.15689290811054724,True,False,standard,False,3.0,False,,24576.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
6 |
-
20,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20,Detect spam messages based on appearance of spam keywords.,10,4,0.05,0.001,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,14.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.16,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
7 |
-
21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
8 |
-
24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
9 |
-
3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,12.0,5.0,3.0,custom,4.0,48.0,gelu,6.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.22188007849009167,True,False,standard,False,1.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
10 |
-
33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17457431218879393,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
11 |
-
34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.16329931618554522,True,False,standard,False,5.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
12 |
-
35,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35,Alternates capitalization of each character in words.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
13 |
-
36,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10,4,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.19402850002906638,True,False,standard,False,3.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
14 |
-
37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
15 |
-
38,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/38,Checks if tokens alternate between two types.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,2.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
16 |
-
4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17056057308448835,True,False,standard,False,1.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
17 |
-
8,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8,Fills gaps between tokens with a specified filler.,10,4,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.13333333333333333,True,False,standard,False,8.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
18 |
-
ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect object identification,16,16,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True
|
19 |
-
ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,Indirect object identification,16,16,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True
|
|
|
1 |
+
case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,training_args_file_url,weights_file_url,circuit_file_url,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.act_fn,training_args.clip_grad_norm,training_args.lr_scheduler,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.checkpoint_index,transformer_cfg.checkpoint_label_type,transformer_cfg.checkpoint_value,transformer_cfg.tokenizer_name,transformer_cfg.window_size,transformer_cfg.attn_types,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.device,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.rotary_dim,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.n_key_value_heads,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs
|
2 |
+
11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1460593486680443,True,False,standard,False,5.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
3 |
+
13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
4 |
+
18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
5 |
+
19,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19,Removes consecutive duplicate tokens from a sequence.,15,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,32.0,15.0,8.0,custom,4.0,128.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.15689290811054724,True,False,standard,False,3.0,False,,24576.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
6 |
+
20,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20,Detect spam messages based on appearance of spam keywords.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg_1110.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta_1110.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_1110.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl,0.05,0.001,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,14.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.16,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
7 |
+
21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
8 |
+
24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
9 |
+
3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg_10110.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta_10110.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_10110.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,12.0,5.0,3.0,custom,4.0,48.0,gelu,6.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.22188007849009167,True,False,standard,False,1.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
10 |
+
33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17457431218879393,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
11 |
+
34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.16329931618554522,True,False,standard,False,5.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
12 |
+
35,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35,Alternates capitalization of each character in words.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
13 |
+
36,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg_10110.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta_10110.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_10110.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.19402850002906638,True,False,standard,False,3.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
14 |
+
37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
15 |
+
38,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/38,Checks if tokens alternate between two types.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,2.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
16 |
+
4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17056057308448835,True,False,standard,False,1.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
17 |
+
8,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8,Fills gaps between tokens with a specified filler.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.13333333333333333,True,False,standard,False,8.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
18 |
+
ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect object identification,16,16,,,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth,,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True
|
19 |
+
ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,Indirect object identification,16,16,,,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_100_100_40.pth,,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True
|
benchmark_cases_metadata.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d42203edfeb52102b4df24aecf54b5a51c9c4f547b6cede024422c898564f69f
|
3 |
+
size 56701
|
benchmark_metadata.json
CHANGED
@@ -91,6 +91,7 @@
|
|
91 |
"trust_remote_code": false,
|
92 |
"rotary_adjacent_pairs": false
|
93 |
},
|
|
|
94 |
"training_args": {
|
95 |
"atol": 0.05,
|
96 |
"lr": 0.01,
|
@@ -102,7 +103,10 @@
|
|
102 |
"act_fn": "gelu",
|
103 |
"clip_grad_norm": 1.0,
|
104 |
"lr_scheduler": ""
|
105 |
-
}
|
|
|
|
|
|
|
106 |
},
|
107 |
{
|
108 |
"case_id": "13",
|
@@ -185,6 +189,7 @@
|
|
185 |
"trust_remote_code": false,
|
186 |
"rotary_adjacent_pairs": false
|
187 |
},
|
|
|
188 |
"training_args": {
|
189 |
"atol": 0.05,
|
190 |
"lr": 0.01,
|
@@ -196,7 +201,10 @@
|
|
196 |
"act_fn": "gelu",
|
197 |
"clip_grad_norm": 1.0,
|
198 |
"lr_scheduler": ""
|
199 |
-
}
|
|
|
|
|
|
|
200 |
},
|
201 |
{
|
202 |
"case_id": "18",
|
@@ -281,6 +289,7 @@
|
|
281 |
"trust_remote_code": false,
|
282 |
"rotary_adjacent_pairs": false
|
283 |
},
|
|
|
284 |
"training_args": {
|
285 |
"atol": 0.05,
|
286 |
"lr": 0.001,
|
@@ -292,7 +301,10 @@
|
|
292 |
"act_fn": "gelu",
|
293 |
"clip_grad_norm": 0.1,
|
294 |
"lr_scheduler": ""
|
295 |
-
}
|
|
|
|
|
|
|
296 |
},
|
297 |
{
|
298 |
"case_id": "19",
|
@@ -375,6 +387,7 @@
|
|
375 |
"trust_remote_code": false,
|
376 |
"rotary_adjacent_pairs": false
|
377 |
},
|
|
|
378 |
"training_args": {
|
379 |
"atol": 0.05,
|
380 |
"lr": 0.001,
|
@@ -386,7 +399,10 @@
|
|
386 |
"act_fn": "gelu",
|
387 |
"clip_grad_norm": 0.1,
|
388 |
"lr_scheduler": ""
|
389 |
-
}
|
|
|
|
|
|
|
390 |
},
|
391 |
{
|
392 |
"case_id": "20",
|
@@ -478,6 +494,7 @@
|
|
478 |
"trust_remote_code": false,
|
479 |
"rotary_adjacent_pairs": false
|
480 |
},
|
|
|
481 |
"training_args": {
|
482 |
"atol": 0.05,
|
483 |
"lr": 0.001,
|
@@ -489,7 +506,10 @@
|
|
489 |
"act_fn": "gelu",
|
490 |
"clip_grad_norm": 0.1,
|
491 |
"lr_scheduler": ""
|
492 |
-
}
|
|
|
|
|
|
|
493 |
},
|
494 |
{
|
495 |
"case_id": "21",
|
@@ -572,6 +592,7 @@
|
|
572 |
"trust_remote_code": false,
|
573 |
"rotary_adjacent_pairs": false
|
574 |
},
|
|
|
575 |
"training_args": {
|
576 |
"atol": 0.05,
|
577 |
"lr": 0.01,
|
@@ -583,7 +604,10 @@
|
|
583 |
"act_fn": "gelu",
|
584 |
"clip_grad_norm": 1.0,
|
585 |
"lr_scheduler": ""
|
586 |
-
}
|
|
|
|
|
|
|
587 |
},
|
588 |
{
|
589 |
"case_id": "24",
|
@@ -666,6 +690,7 @@
|
|
666 |
"trust_remote_code": false,
|
667 |
"rotary_adjacent_pairs": false
|
668 |
},
|
|
|
669 |
"training_args": {
|
670 |
"atol": 0.05,
|
671 |
"lr": 0.01,
|
@@ -677,7 +702,10 @@
|
|
677 |
"act_fn": "gelu",
|
678 |
"clip_grad_norm": 1.0,
|
679 |
"lr_scheduler": ""
|
680 |
-
}
|
|
|
|
|
|
|
681 |
},
|
682 |
{
|
683 |
"case_id": "3",
|
@@ -761,6 +789,7 @@
|
|
761 |
"trust_remote_code": false,
|
762 |
"rotary_adjacent_pairs": false
|
763 |
},
|
|
|
764 |
"training_args": {
|
765 |
"atol": 0.05,
|
766 |
"lr": 0.001,
|
@@ -772,7 +801,10 @@
|
|
772 |
"act_fn": "gelu",
|
773 |
"clip_grad_norm": 0.1,
|
774 |
"lr_scheduler": ""
|
775 |
-
}
|
|
|
|
|
|
|
776 |
},
|
777 |
{
|
778 |
"case_id": "33",
|
@@ -860,6 +892,7 @@
|
|
860 |
"trust_remote_code": false,
|
861 |
"rotary_adjacent_pairs": false
|
862 |
},
|
|
|
863 |
"training_args": {
|
864 |
"atol": 0.05,
|
865 |
"lr": 0.001,
|
@@ -871,7 +904,10 @@
|
|
871 |
"act_fn": "gelu",
|
872 |
"clip_grad_norm": 0.1,
|
873 |
"lr_scheduler": ""
|
874 |
-
}
|
|
|
|
|
|
|
875 |
},
|
876 |
{
|
877 |
"case_id": "34",
|
@@ -959,6 +995,7 @@
|
|
959 |
"trust_remote_code": false,
|
960 |
"rotary_adjacent_pairs": false
|
961 |
},
|
|
|
962 |
"training_args": {
|
963 |
"atol": 0.05,
|
964 |
"lr": 0.001,
|
@@ -970,7 +1007,10 @@
|
|
970 |
"act_fn": "gelu",
|
971 |
"clip_grad_norm": 0.1,
|
972 |
"lr_scheduler": ""
|
973 |
-
}
|
|
|
|
|
|
|
974 |
},
|
975 |
{
|
976 |
"case_id": "35",
|
@@ -1058,6 +1098,7 @@
|
|
1058 |
"trust_remote_code": false,
|
1059 |
"rotary_adjacent_pairs": false
|
1060 |
},
|
|
|
1061 |
"training_args": {
|
1062 |
"atol": 0.05,
|
1063 |
"lr": 0.001,
|
@@ -1069,7 +1110,10 @@
|
|
1069 |
"act_fn": "gelu",
|
1070 |
"clip_grad_norm": 0.1,
|
1071 |
"lr_scheduler": ""
|
1072 |
-
}
|
|
|
|
|
|
|
1073 |
},
|
1074 |
{
|
1075 |
"case_id": "36",
|
@@ -1152,6 +1196,7 @@
|
|
1152 |
"trust_remote_code": false,
|
1153 |
"rotary_adjacent_pairs": false
|
1154 |
},
|
|
|
1155 |
"training_args": {
|
1156 |
"atol": 0.05,
|
1157 |
"lr": 0.001,
|
@@ -1163,7 +1208,10 @@
|
|
1163 |
"act_fn": "gelu",
|
1164 |
"clip_grad_norm": 0.1,
|
1165 |
"lr_scheduler": ""
|
1166 |
-
}
|
|
|
|
|
|
|
1167 |
},
|
1168 |
{
|
1169 |
"case_id": "37",
|
@@ -1251,6 +1299,7 @@
|
|
1251 |
"trust_remote_code": false,
|
1252 |
"rotary_adjacent_pairs": false
|
1253 |
},
|
|
|
1254 |
"training_args": {
|
1255 |
"atol": 0.05,
|
1256 |
"lr": 0.001,
|
@@ -1262,7 +1311,10 @@
|
|
1262 |
"act_fn": "gelu",
|
1263 |
"clip_grad_norm": 0.1,
|
1264 |
"lr_scheduler": ""
|
1265 |
-
}
|
|
|
|
|
|
|
1266 |
},
|
1267 |
{
|
1268 |
"case_id": "38",
|
@@ -1345,6 +1397,7 @@
|
|
1345 |
"trust_remote_code": false,
|
1346 |
"rotary_adjacent_pairs": false
|
1347 |
},
|
|
|
1348 |
"training_args": {
|
1349 |
"atol": 0.05,
|
1350 |
"lr": 0.001,
|
@@ -1356,7 +1409,10 @@
|
|
1356 |
"act_fn": "gelu",
|
1357 |
"clip_grad_norm": 0.1,
|
1358 |
"lr_scheduler": ""
|
1359 |
-
}
|
|
|
|
|
|
|
1360 |
},
|
1361 |
{
|
1362 |
"case_id": "4",
|
@@ -1441,6 +1497,7 @@
|
|
1441 |
"trust_remote_code": false,
|
1442 |
"rotary_adjacent_pairs": false
|
1443 |
},
|
|
|
1444 |
"training_args": {
|
1445 |
"atol": 0.05,
|
1446 |
"lr": 0.001,
|
@@ -1452,7 +1509,10 @@
|
|
1452 |
"act_fn": "gelu",
|
1453 |
"clip_grad_norm": 0.1,
|
1454 |
"lr_scheduler": ""
|
1455 |
-
}
|
|
|
|
|
|
|
1456 |
},
|
1457 |
{
|
1458 |
"case_id": "8",
|
@@ -1540,6 +1600,7 @@
|
|
1540 |
"trust_remote_code": false,
|
1541 |
"rotary_adjacent_pairs": false
|
1542 |
},
|
|
|
1543 |
"training_args": {
|
1544 |
"atol": 0.05,
|
1545 |
"lr": 0.01,
|
@@ -1551,7 +1612,10 @@
|
|
1551 |
"act_fn": "gelu",
|
1552 |
"clip_grad_norm": 1.0,
|
1553 |
"lr_scheduler": ""
|
1554 |
-
}
|
|
|
|
|
|
|
1555 |
},
|
1556 |
{
|
1557 |
"case_id": "ioi",
|
@@ -1568,7 +1632,8 @@
|
|
1568 |
"file_name": "ll_model_100_100_40.pth",
|
1569 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth"
|
1570 |
}
|
1571 |
-
]
|
|
|
1572 |
},
|
1573 |
{
|
1574 |
"case_id": "ioi_next_token",
|
@@ -1589,7 +1654,8 @@
|
|
1589 |
"file_name": "training_args.json",
|
1590 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/training_args.json"
|
1591 |
}
|
1592 |
-
]
|
|
|
1593 |
}
|
1594 |
]
|
1595 |
}
|
|
|
91 |
"trust_remote_code": false,
|
92 |
"rotary_adjacent_pairs": false
|
93 |
},
|
94 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg_510.pkl",
|
95 |
"training_args": {
|
96 |
"atol": 0.05,
|
97 |
"lr": 0.01,
|
|
|
103 |
"act_fn": "gelu",
|
104 |
"clip_grad_norm": 1.0,
|
105 |
"lr_scheduler": ""
|
106 |
+
},
|
107 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta_510.json",
|
108 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_510.pth",
|
109 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
|
110 |
},
|
111 |
{
|
112 |
"case_id": "13",
|
|
|
189 |
"trust_remote_code": false,
|
190 |
"rotary_adjacent_pairs": false
|
191 |
},
|
192 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg_510.pkl",
|
193 |
"training_args": {
|
194 |
"atol": 0.05,
|
195 |
"lr": 0.01,
|
|
|
201 |
"act_fn": "gelu",
|
202 |
"clip_grad_norm": 1.0,
|
203 |
"lr_scheduler": ""
|
204 |
+
},
|
205 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta_510.json",
|
206 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_510.pth",
|
207 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
|
208 |
},
|
209 |
{
|
210 |
"case_id": "18",
|
|
|
289 |
"trust_remote_code": false,
|
290 |
"rotary_adjacent_pairs": false
|
291 |
},
|
292 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg_510.pkl",
|
293 |
"training_args": {
|
294 |
"atol": 0.05,
|
295 |
"lr": 0.001,
|
|
|
301 |
"act_fn": "gelu",
|
302 |
"clip_grad_norm": 0.1,
|
303 |
"lr_scheduler": ""
|
304 |
+
},
|
305 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta_510.json",
|
306 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_510.pth",
|
307 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
|
308 |
},
|
309 |
{
|
310 |
"case_id": "19",
|
|
|
387 |
"trust_remote_code": false,
|
388 |
"rotary_adjacent_pairs": false
|
389 |
},
|
390 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg_510.pkl",
|
391 |
"training_args": {
|
392 |
"atol": 0.05,
|
393 |
"lr": 0.001,
|
|
|
399 |
"act_fn": "gelu",
|
400 |
"clip_grad_norm": 0.1,
|
401 |
"lr_scheduler": ""
|
402 |
+
},
|
403 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta_510.json",
|
404 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_510.pth",
|
405 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
|
406 |
},
|
407 |
{
|
408 |
"case_id": "20",
|
|
|
494 |
"trust_remote_code": false,
|
495 |
"rotary_adjacent_pairs": false
|
496 |
},
|
497 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg_1110.pkl",
|
498 |
"training_args": {
|
499 |
"atol": 0.05,
|
500 |
"lr": 0.001,
|
|
|
506 |
"act_fn": "gelu",
|
507 |
"clip_grad_norm": 0.1,
|
508 |
"lr_scheduler": ""
|
509 |
+
},
|
510 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta_1110.json",
|
511 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_1110.pth",
|
512 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
|
513 |
},
|
514 |
{
|
515 |
"case_id": "21",
|
|
|
592 |
"trust_remote_code": false,
|
593 |
"rotary_adjacent_pairs": false
|
594 |
},
|
595 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg_510.pkl",
|
596 |
"training_args": {
|
597 |
"atol": 0.05,
|
598 |
"lr": 0.01,
|
|
|
604 |
"act_fn": "gelu",
|
605 |
"clip_grad_norm": 1.0,
|
606 |
"lr_scheduler": ""
|
607 |
+
},
|
608 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta_510.json",
|
609 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_510.pth",
|
610 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
|
611 |
},
|
612 |
{
|
613 |
"case_id": "24",
|
|
|
690 |
"trust_remote_code": false,
|
691 |
"rotary_adjacent_pairs": false
|
692 |
},
|
693 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg_510.pkl",
|
694 |
"training_args": {
|
695 |
"atol": 0.05,
|
696 |
"lr": 0.01,
|
|
|
702 |
"act_fn": "gelu",
|
703 |
"clip_grad_norm": 1.0,
|
704 |
"lr_scheduler": ""
|
705 |
+
},
|
706 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta_510.json",
|
707 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_510.pth",
|
708 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl"
|
709 |
},
|
710 |
{
|
711 |
"case_id": "3",
|
|
|
789 |
"trust_remote_code": false,
|
790 |
"rotary_adjacent_pairs": false
|
791 |
},
|
792 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg_10110.pkl",
|
793 |
"training_args": {
|
794 |
"atol": 0.05,
|
795 |
"lr": 0.001,
|
|
|
801 |
"act_fn": "gelu",
|
802 |
"clip_grad_norm": 0.1,
|
803 |
"lr_scheduler": ""
|
804 |
+
},
|
805 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta_10110.json",
|
806 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_10110.pth",
|
807 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
|
808 |
},
|
809 |
{
|
810 |
"case_id": "33",
|
|
|
892 |
"trust_remote_code": false,
|
893 |
"rotary_adjacent_pairs": false
|
894 |
},
|
895 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg_510.pkl",
|
896 |
"training_args": {
|
897 |
"atol": 0.05,
|
898 |
"lr": 0.001,
|
|
|
904 |
"act_fn": "gelu",
|
905 |
"clip_grad_norm": 0.1,
|
906 |
"lr_scheduler": ""
|
907 |
+
},
|
908 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta_510.json",
|
909 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_510.pth",
|
910 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
|
911 |
},
|
912 |
{
|
913 |
"case_id": "34",
|
|
|
995 |
"trust_remote_code": false,
|
996 |
"rotary_adjacent_pairs": false
|
997 |
},
|
998 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg_510.pkl",
|
999 |
"training_args": {
|
1000 |
"atol": 0.05,
|
1001 |
"lr": 0.001,
|
|
|
1007 |
"act_fn": "gelu",
|
1008 |
"clip_grad_norm": 0.1,
|
1009 |
"lr_scheduler": ""
|
1010 |
+
},
|
1011 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta_510.json",
|
1012 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_510.pth",
|
1013 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
|
1014 |
},
|
1015 |
{
|
1016 |
"case_id": "35",
|
|
|
1098 |
"trust_remote_code": false,
|
1099 |
"rotary_adjacent_pairs": false
|
1100 |
},
|
1101 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg_510.pkl",
|
1102 |
"training_args": {
|
1103 |
"atol": 0.05,
|
1104 |
"lr": 0.001,
|
|
|
1110 |
"act_fn": "gelu",
|
1111 |
"clip_grad_norm": 0.1,
|
1112 |
"lr_scheduler": ""
|
1113 |
+
},
|
1114 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta_510.json",
|
1115 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_510.pth",
|
1116 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
|
1117 |
},
|
1118 |
{
|
1119 |
"case_id": "36",
|
|
|
1196 |
"trust_remote_code": false,
|
1197 |
"rotary_adjacent_pairs": false
|
1198 |
},
|
1199 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg_10110.pkl",
|
1200 |
"training_args": {
|
1201 |
"atol": 0.05,
|
1202 |
"lr": 0.001,
|
|
|
1208 |
"act_fn": "gelu",
|
1209 |
"clip_grad_norm": 0.1,
|
1210 |
"lr_scheduler": ""
|
1211 |
+
},
|
1212 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta_10110.json",
|
1213 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_10110.pth",
|
1214 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
|
1215 |
},
|
1216 |
{
|
1217 |
"case_id": "37",
|
|
|
1299 |
"trust_remote_code": false,
|
1300 |
"rotary_adjacent_pairs": false
|
1301 |
},
|
1302 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg_510.pkl",
|
1303 |
"training_args": {
|
1304 |
"atol": 0.05,
|
1305 |
"lr": 0.001,
|
|
|
1311 |
"act_fn": "gelu",
|
1312 |
"clip_grad_norm": 0.1,
|
1313 |
"lr_scheduler": ""
|
1314 |
+
},
|
1315 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta_510.json",
|
1316 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_510.pth",
|
1317 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
|
1318 |
},
|
1319 |
{
|
1320 |
"case_id": "38",
|
|
|
1397 |
"trust_remote_code": false,
|
1398 |
"rotary_adjacent_pairs": false
|
1399 |
},
|
1400 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_cfg_510.pkl",
|
1401 |
"training_args": {
|
1402 |
"atol": 0.05,
|
1403 |
"lr": 0.001,
|
|
|
1409 |
"act_fn": "gelu",
|
1410 |
"clip_grad_norm": 0.1,
|
1411 |
"lr_scheduler": ""
|
1412 |
+
},
|
1413 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/meta_510.json",
|
1414 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_510.pth",
|
1415 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/edges.pkl"
|
1416 |
},
|
1417 |
{
|
1418 |
"case_id": "4",
|
|
|
1497 |
"trust_remote_code": false,
|
1498 |
"rotary_adjacent_pairs": false
|
1499 |
},
|
1500 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg_510.pkl",
|
1501 |
"training_args": {
|
1502 |
"atol": 0.05,
|
1503 |
"lr": 0.001,
|
|
|
1509 |
"act_fn": "gelu",
|
1510 |
"clip_grad_norm": 0.1,
|
1511 |
"lr_scheduler": ""
|
1512 |
+
},
|
1513 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta_510.json",
|
1514 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_510.pth",
|
1515 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
|
1516 |
},
|
1517 |
{
|
1518 |
"case_id": "8",
|
|
|
1600 |
"trust_remote_code": false,
|
1601 |
"rotary_adjacent_pairs": false
|
1602 |
},
|
1603 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg_510.pkl",
|
1604 |
"training_args": {
|
1605 |
"atol": 0.05,
|
1606 |
"lr": 0.01,
|
|
|
1612 |
"act_fn": "gelu",
|
1613 |
"clip_grad_norm": 1.0,
|
1614 |
"lr_scheduler": ""
|
1615 |
+
},
|
1616 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta_510.json",
|
1617 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_510.pth",
|
1618 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl"
|
1619 |
},
|
1620 |
{
|
1621 |
"case_id": "ioi",
|
|
|
1632 |
"file_name": "ll_model_100_100_40.pth",
|
1633 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth"
|
1634 |
}
|
1635 |
+
],
|
1636 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth"
|
1637 |
},
|
1638 |
{
|
1639 |
"case_id": "ioi_next_token",
|
|
|
1654 |
"file_name": "training_args.json",
|
1655 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/training_args.json"
|
1656 |
}
|
1657 |
+
],
|
1658 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_100_100_40.pth"
|
1659 |
}
|
1660 |
]
|
1661 |
}
|
benchmark_metadata_croissant.json
CHANGED
@@ -197,6 +197,66 @@
|
|
197 |
}
|
198 |
}
|
199 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
{
|
201 |
"@type": "cr:Field",
|
202 |
"@id": "training_args.atol",
|
|
|
197 |
}
|
198 |
}
|
199 |
},
|
200 |
+
{
|
201 |
+
"@type": "cr:Field",
|
202 |
+
"@id": "transformer_cfg_file_url",
|
203 |
+
"name": "transformer_cfg_file_url",
|
204 |
+
"description": "Column 'transformer_cfg_file_url' from the parquet file describing all the cases in the benchmark.",
|
205 |
+
"dataType": "sc:Text",
|
206 |
+
"source": {
|
207 |
+
"fileSet": {
|
208 |
+
"@id": "benchmark-cases-parquet"
|
209 |
+
},
|
210 |
+
"extract": {
|
211 |
+
"column": "transformer_cfg_file_url"
|
212 |
+
}
|
213 |
+
}
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"@type": "cr:Field",
|
217 |
+
"@id": "training_args_file_url",
|
218 |
+
"name": "training_args_file_url",
|
219 |
+
"description": "Column 'training_args_file_url' from the parquet file describing all the cases in the benchmark.",
|
220 |
+
"dataType": "sc:Text",
|
221 |
+
"source": {
|
222 |
+
"fileSet": {
|
223 |
+
"@id": "benchmark-cases-parquet"
|
224 |
+
},
|
225 |
+
"extract": {
|
226 |
+
"column": "training_args_file_url"
|
227 |
+
}
|
228 |
+
}
|
229 |
+
},
|
230 |
+
{
|
231 |
+
"@type": "cr:Field",
|
232 |
+
"@id": "weights_file_url",
|
233 |
+
"name": "weights_file_url",
|
234 |
+
"description": "Column 'weights_file_url' from the parquet file describing all the cases in the benchmark.",
|
235 |
+
"dataType": "sc:Text",
|
236 |
+
"source": {
|
237 |
+
"fileSet": {
|
238 |
+
"@id": "benchmark-cases-parquet"
|
239 |
+
},
|
240 |
+
"extract": {
|
241 |
+
"column": "weights_file_url"
|
242 |
+
}
|
243 |
+
}
|
244 |
+
},
|
245 |
+
{
|
246 |
+
"@type": "cr:Field",
|
247 |
+
"@id": "circuit_file_url",
|
248 |
+
"name": "circuit_file_url",
|
249 |
+
"description": "Column 'circuit_file_url' from the parquet file describing all the cases in the benchmark.",
|
250 |
+
"dataType": "sc:Text",
|
251 |
+
"source": {
|
252 |
+
"fileSet": {
|
253 |
+
"@id": "benchmark-cases-parquet"
|
254 |
+
},
|
255 |
+
"extract": {
|
256 |
+
"column": "circuit_file_url"
|
257 |
+
}
|
258 |
+
}
|
259 |
+
},
|
260 |
{
|
261 |
"@type": "cr:Field",
|
262 |
"@id": "training_args.atol",
|