iarcuschin commited on
Commit
f230d44
·
1 Parent(s): e43ebe2

Update metadata

Browse files
benchmark_cases_metadata.csv CHANGED
@@ -1,19 +1,48 @@
1
- case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,training_args_file_url,weights_file_url,circuit_file_url,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.tokenizer_name,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.act_fn,training_args.clip_grad_norm,training_args.lr_scheduler,training_args.model_pair,training_args.same_size,training_args.seed,training_args.batch_size,training_args.include_mlp,training_args.next_token,training_args.detach_while_caching,training_args.non_ioi_thresh,training_args.use_per_token_check,training_args.num_workers,training_args.early_stop,training_args.scheduler_val_metric,training_args.scheduler_mode,training_args.val_IIA_sampling,training_args.use_all_tokens_for_behavior,training_args.siit_sampling,training_args.optimizer_kwargs.betas
2
- 11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1460593486680443,True,False,standard,False,5,False,3456,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,,True,,,True,True,True,,True,,True,,,,True,,
3
- 13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl,2,20,10,5,custom,4,80,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3,False,9600,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,,True,,,True,True,True,,True,,True,,,,True,,
4
- 18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl,2,26,10,6,custom,4,104,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3,False,15808,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.001,False,1.0,1.0,1.0,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
5
- 19,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19,Removes consecutive duplicate tokens from a sequence.,15,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl,2,32,15,8,custom,4,128,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.15689290811054724,True,False,standard,False,3,False,24576,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,,True,,,True,True,True,,True,,True,,,,True,,
6
- 20,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20,Detect spam messages based on appearance of spam keywords.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl,2,13,10,3,custom,4,52,gelu,14,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16,True,False,standard,False,2,False,3952,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,0.4,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
7
- 21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl,4,50,10,12,custom,4,200,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.09847319278346618,True,False,standard,False,3,False,118400,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.0005,False,1.0,1.0,0.5,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
8
- 26,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/26,Creates a cascading effect by repeating each token in sequence incrementally.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl,2,21,10,5,custom,4,84,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12344267996967354,True,False,standard,False,27,False,10416,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,0.4,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
9
- 29,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/29,Creates abbreviations for each token in the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl,2,13,10,3,custom,4,52,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3952,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,0.4,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
10
- 3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl,2,12,5,3,custom,4,48,gelu,6,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.22188007849009167,True,False,standard,False,1,False,3456,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,10.0,2000.0,gelu,0.1,,strict,False,,,True,True,True,,True,,True,,,,True,,
11
- 33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl,2,4,10,1,custom,4,16,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17457431218879393,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,,True,,,True,True,True,,True,,True,,,,True,,
12
- 34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl,2,16,10,4,custom,4,64,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,5,False,6144,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
13
- 35,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35,Alternates capitalization of each character in words.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl,2,9,10,2,custom,4,36,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,1872,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
14
- 36,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl,2,6,10,1,custom,4,24,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.19402850002906638,True,False,standard,False,3,False,768,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
15
- 37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3456,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
16
- 4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl,2,20,10,5,custom,4,80,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,1,False,9600,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,,True,,,True,True,True,,True,,True,,,,True,,
17
- 7,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/7,Returns the number of times each token occurs in the input.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/edges.pkl,2,17,10,4,custom,4,68,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.15689290811054724,True,False,standard,False,10,False,6800,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,0.5,2000.0,gelu,0.1,,strict,False,1234.0,256.0,False,False,True,,True,,True,,,,True,,
18
- ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect Object Identification (IOI) task.,16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,False,False,standard,False,50257,False,84934656,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.001,False,1.0,1.0,0.4,,,1.0,,,True,0.0,512.0,True,False,True,0.65,False,0.0,True,"val/accuracy,val/IIA",max,random,False,individual,"0.9,0.9"
19
- ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,"Indirect Object Identification (IOI) task, trained using next token prediction.",16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.001,False,1.0,1.0,0.4,,,1.0,,,True,,256.0,True,True,True,0.65,False,0.0,True,"val/accuracy,val/IIA",max,,True,,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,training_args_file_url,weights_file_url,circuit_file_url,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.tokenizer_name,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs,transformer_cfg.load_in_4bit,training_args.output_dir,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.early_stop_accuracy_threshold,training_args.act_fn,training_args.use_wandb,training_args.save_model_to_wandb,training_args.clip_grad_norm,training_args.lr_scheduler,training_args.model_pair,training_args.same_size,training_args.seed,training_args.batch_size,training_args.include_mlp,training_args.detach_while_caching,training_args.scheduler_val_metric,training_args.siit_sampling,training_args.val_iia_sampling,training_args.next_token,training_args.non_ioi_thresh,training_args.use_per_token_check,training_args.num_workers,training_args.early_stop,training_args.scheduler_mode,training_args.val_IIA_sampling,training_args.use_all_tokens_for_behavior,training_args.optimizer_kwargs.betas
2
+ 101,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/101,Check if each element is a square of an integer.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
3
+ 103,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/103,Swap consecutive numbers in a list,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/edges.pkl,3,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.10787197799411874,True,False,standard,False,11,False,20736,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
4
+ 11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1460593486680443,True,False,standard,False,5,False,3456,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.4,500.0,,gelu,True,True,1.0,,,True,,,True,True,,,,True,,True,,True,,,True,
5
+ 110,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/110,"Inserts zeros between each element, removing the latter half of the list.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/edges.pkl,2,20,10,5,custom,4,80,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.11925695879998878,True,False,standard,False,11,False,9600,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
6
+ 111,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/111,Returns the last element of the sequence and pads the rest with zeros.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/edges.pkl,3,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.09847319278346618,True,False,standard,False,11,False,20736,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
7
+ 113,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/113,"Inverts the sequence if it is sorted in ascending order, otherwise leaves it unchanged.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl,7,88,10,22,custom,4,352,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.0512147519731584,True,False,standard,False,30,False,650496,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
8
+ 114,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/114,Apply a logarithm base 10 to each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/edges.pkl,2,4,10,1,custom,4,16,gelu,12,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.14368424162141993,True,False,standard,False,10,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
9
+ 122,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/122,Check if each number is divisible by 3.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
10
+ 124,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/124,Check if all elements in a list are equal.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/edges.pkl,3,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.11547005383792516,True,False,standard,False,2,False,20736,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
11
+ 129,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/129,Checks if all elements are a multiple of n (set the default at 2).,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/edges.pkl,3,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.10504514628777804,True,False,standard,False,2,False,576,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
12
+ 13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl,2,20,10,5,custom,4,80,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3,False,9600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.4,500.0,,gelu,True,True,1.0,,,True,,,True,True,,,,True,,True,,True,,,True,
13
+ 14,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14,Returns the count of 'a' in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl,2,8,10,2,custom,4,32,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.15689290811054724,True,False,standard,False,10,False,1536,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
14
+ 18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl,2,26,10,6,custom,4,104,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3,False,15808,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,1.0,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
15
+ 19,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19,Removes consecutive duplicate tokens from a sequence.,15,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl,2,32,15,8,custom,4,128,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.15689290811054724,True,False,standard,False,3,False,24576,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,,True,,,True,True,,,,True,,True,,True,,,True,
16
+ 2,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/2,Reverse the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl,4,56,10,14,custom,4,224,gelu,28,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.07593263966019993,True,False,standard,False,26,False,150528,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
17
+ 20,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20,Detect spam messages based on appearance of spam keywords.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl,2,13,10,3,custom,4,52,gelu,14,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16,True,False,standard,False,2,False,3952,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
18
+ 21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl,4,50,10,12,custom,4,200,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.09847319278346618,True,False,standard,False,3,False,118400,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.0005,False,1.0,1.0,0.5,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
19
+ 24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,2,36,10,9,custom,4,144,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1885618083164127,True,False,standard,False,3,False,31104,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
20
+ 25,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/25,Normalizes token frequencies in a sequence to a range between 0 and 1.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/edges.pkl,2,62,10,15,custom,4,248,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.08295613557843402,True,False,standard,False,56,False,91264,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
21
+ 26,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/26,Creates a cascading effect by repeating each token in sequence incrementally.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl,2,21,10,5,custom,4,84,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12344267996967354,True,False,standard,False,27,False,10416,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
22
+ 29,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/29,Creates abbreviations for each token in the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl,2,13,10,3,custom,4,52,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3952,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
23
+ 3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl,2,12,5,3,custom,4,48,gelu,6,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.22188007849009167,True,False,standard,False,1,False,3456,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,10.0,2000.0,,gelu,True,True,0.1,,strict,False,,,True,True,,,,True,,True,,True,,,True,
24
+ 30,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/30,Tags numeric tokens in a sequence based on whether they fall within a given range.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
25
+ 31,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31,Identify if tokens in the sequence are anagrams of the word 'listen'.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl,2,4,10,1,custom,4,16,gelu,11,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
26
+ 33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl,2,4,10,1,custom,4,16,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17457431218879393,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,,True,,,True,True,,,,True,,True,,True,,,True,
27
+ 34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl,2,16,10,4,custom,4,64,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,5,False,6144,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,1.0,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
28
+ 35,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35,Alternates capitalization of each character in words.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl,2,9,10,2,custom,4,36,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,1872,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,1.0,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
29
+ 36,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl,2,6,10,1,custom,4,24,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.19402850002906638,True,False,standard,False,3,False,768,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,1.0,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
30
+ 37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3456,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,1.0,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
31
+ 39,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/39,Returns the fraction of 'x' in the input up to the i-th position for all i.,60,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/edges.pkl,2,120,60,30,custom,4,480,gelu,28,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.08432740427115679,True,False,standard,False,1,False,345600,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
32
+ 4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl,2,20,10,5,custom,4,80,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,1,False,9600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,,True,,,True,True,,,,True,,True,,True,,,True,
33
+ 40,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/40,Sum the last and previous to last digits of a number,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/edges.pkl,2,4,10,1,custom,4,16,gelu,31,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.09847319278346618,True,False,standard,False,12,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
34
+ 44,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44,Replaces each element with the number of elements greater than it in the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.13719886811400708,True,False,standard,False,10,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
35
+ 45,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/45,Doubles the first half of the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/edges.pkl,3,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.11094003924504584,True,False,standard,False,16,False,20736,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
36
+ 51,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/51,Checks if each element is a Fibonacci number,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/edges.pkl,2,4,10,1,custom,4,16,gelu,102,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.07525766947068778,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
37
+ 56,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/56,Sets every third element to zero.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.13333333333333333,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
38
+ 58,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/58,Mirrors the first half of the sequence to the second half.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/edges.pkl,3,32,10,8,custom,4,128,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.10415112878465911,True,False,standard,False,11,False,36864,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
39
+ 63,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/63,Replaces each element with the number of elements less than it in the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.13719886811400708,True,False,standard,False,10,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
40
+ 69,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/69,"Assign -1, 0, or 1 to each element of the input sequence based on its sign.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
41
+ 7,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/7,Returns the number of times each token occurs in the input.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/edges.pkl,2,17,10,4,custom,4,68,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.15689290811054724,True,False,standard,False,10,False,6800,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.5,2000.0,,gelu,True,True,0.1,,strict,False,1234.0,256.0,False,True,,,,False,,True,,True,,,True,
42
+ 79,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/79,Check if each number in a sequence is prime,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
43
+ 82,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/82,Halve the elements in the second half of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl,4,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1059625885652035,True,False,standard,False,16,False,27648,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
44
+ 86,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/86,"Check if each element is a power of 2. Return 1 if true, otherwise 0.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
45
+ 87,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/87,Binarize a sequence of integers using a threshold.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
46
+ 93,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/93,Swaps the nth with the n+1th element if n%2==1.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/edges.pkl,3,20,10,5,custom,4,80,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.10886621079036347,True,False,standard,False,11,False,14400,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
47
+ ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect Object Identification (IOI) task.,16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,False,False,standard,False,50257,False,84934656,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,,,,True,True,1.0,,,True,0.0,512.0,True,True,"val/accuracy,val/IIA",individual,,False,0.65,False,0.0,True,max,random,False,"0.9,0.9"
48
+ ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,"Indirect Object Identification (IOI) task, trained using next token prediction.",16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,,,,True,True,1.0,,,True,,256.0,True,True,"val/accuracy,val/IIA",,,True,0.65,False,0.0,True,max,,True,
benchmark_cases_metadata.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9901890c05e11095ebb3dbd5710284edd09c37b40422eec02a126231f62f63d1
3
- size 64382
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e070cc1d81773e400a5c30ce2e9b328d370ef2e2f67083ac90fc0638e4d2da8
3
+ size 72322
benchmark_metadata.json CHANGED
The diff for this file is too large to render. See raw diff