{ "pass_1": [ { "Model": "gpt-4o-mini", "Domain": "Computation", "Pass_at_k": 0.9038123167155425 }, { "Model": "gpt-3.5-turbo", "Domain": "Computation", "Pass_at_k": 0.8340175953079179 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "Computation", "Pass_at_k": 0.8686217008797654 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "Computation", "Pass_at_k": 0.8392961876832845 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "Computation", "Pass_at_k": 0.8604105571847507 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "Computation", "Pass_at_k": 0.8351906158357771 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "Computation", "Pass_at_k": 0.7607038123167156 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "Computation", "Pass_at_k": 0.8029325513196481 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "Computation", "Pass_at_k": 0.7712609970674487 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "Computation", "Pass_at_k": 0.8516129032258064 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "Computation", "Pass_at_k": 0.7554252199413489 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "Computation", "Pass_at_k": 0.8093841642228738 }, { "Model": "gpt-4o-mini", "Domain": "Network", "Pass_at_k": 0.703125 }, { "Model": "gpt-3.5-turbo", "Domain": "Network", "Pass_at_k": 0.58984375 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "Network", "Pass_at_k": 0.66796875 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "Network", "Pass_at_k": 0.64453125 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "Network", "Pass_at_k": 0.62109375 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "Network", "Pass_at_k": 0.58984375 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "Network", "Pass_at_k": 0.6015625 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "Network", "Pass_at_k": 0.62109375 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "Network", "Pass_at_k": 0.60546875 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "Network", "Pass_at_k": 0.609375 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "Network", "Pass_at_k": 0.6015625 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "Network", "Pass_at_k": 0.53125 }, { "Model": "gpt-4o-mini", "Domain": "Visualization", "Pass_at_k": 0.5967741935483871 }, { "Model": "gpt-3.5-turbo", "Domain": "Visualization", "Pass_at_k": 0.489247311827957 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "Visualization", "Pass_at_k": 0.4946236559139785 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "Visualization", "Pass_at_k": 0.5053763440860215 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "Visualization", "Pass_at_k": 0.5 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "Visualization", "Pass_at_k": 0.45698924731182794 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "Visualization", "Pass_at_k": 0.41935483870967744 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "Visualization", "Pass_at_k": 0.42473118279569894 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "Visualization", "Pass_at_k": 0.43548387096774194 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "Visualization", "Pass_at_k": 0.478494623655914 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "Visualization", "Pass_at_k": 0.45161290322580644 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "Visualization", "Pass_at_k": 0.34946236559139787 }, { "Model": "gpt-4o-mini", "Domain": "Basic", "Pass_at_k": 0.6915887850467289 }, { "Model": "gpt-3.5-turbo", "Domain": "Basic", "Pass_at_k": 0.5607476635514018 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "Basic", "Pass_at_k": 0.6915887850467289 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "Basic", "Pass_at_k": 0.5981308411214953 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "Basic", "Pass_at_k": 0.6542056074766355 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "Basic", "Pass_at_k": 0.5794392523364486 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "Basic", "Pass_at_k": 0.5514018691588785 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "Basic", "Pass_at_k": 0.5887850467289719 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "Basic", "Pass_at_k": 0.5233644859813084 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "Basic", "Pass_at_k": 0.6074766355140186 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "Basic", "Pass_at_k": 0.616822429906542 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "Basic", "Pass_at_k": 0.4485981308411215 }, { "Model": "gpt-4o-mini", "Domain": "System", "Pass_at_k": 0.51 }, { "Model": "gpt-3.5-turbo", "Domain": "System", "Pass_at_k": 0.32 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "System", "Pass_at_k": 0.41 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "System", "Pass_at_k": 0.46 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "System", "Pass_at_k": 0.41 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "System", "Pass_at_k": 0.36 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "System", "Pass_at_k": 0.35 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "System", "Pass_at_k": 0.34 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "System", "Pass_at_k": 0.36 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "System", "Pass_at_k": 0.37 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "System", "Pass_at_k": 0.42 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "System", "Pass_at_k": 0.19 }, { "Model": "gpt-4o-mini", "Domain": "Cryptography", "Pass_at_k": 0.43 }, { "Model": "gpt-3.5-turbo", "Domain": "Cryptography", "Pass_at_k": 0.31 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "Cryptography", "Pass_at_k": 0.36 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "Cryptography", "Pass_at_k": 0.35 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "Cryptography", "Pass_at_k": 0.38 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "Cryptography", "Pass_at_k": 0.4 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "Cryptography", "Pass_at_k": 0.31 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "Cryptography", "Pass_at_k": 0.27 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "Cryptography", "Pass_at_k": 0.32 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "Cryptography", "Pass_at_k": 0.37 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "Cryptography", "Pass_at_k": 0.35 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "Cryptography", "Pass_at_k": 0.12 }, { "Model": "gpt-4o-mini", "Domain": "Mean", "Pass_at_k": 0.6392167158851098 }, { "Model": "gpt-3.5-turbo", "Domain": "Mean", "Pass_at_k": 0.5173093867812127 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "Mean", "Pass_at_k": 0.5821338153067455 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "Mean", "Pass_at_k": 0.5662224371484669 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "Mean", "Pass_at_k": 0.5709516524435644 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "Mean", "Pass_at_k": 0.5369104775806756 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "Mean", "Pass_at_k": 0.49883717003087863 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "Mean", "Pass_at_k": 0.5079237551407199 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "Mean", "Pass_at_k": 0.5025963506694164 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "Mean", "Pass_at_k": 0.5478265270659565 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "Mean", "Pass_at_k": 0.5325705088456162 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "Mean", "Pass_at_k": 0.4081157767758989 }, { "Model": "gpt-4o-mini", "Domain": "Std", "Pass_at_k": 0.16679801914758088 }, { "Model": "gpt-3.5-turbo", "Domain": "Std", "Pass_at_k": 0.1950117243115276 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "Std", "Pass_at_k": 0.19393547652062595 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "Std", "Pass_at_k": 0.1693855278154664 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "Std", "Pass_at_k": 0.17923951210025596 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "Std", "Pass_at_k": 0.17321135521991954 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "Std", "Pass_at_k": 0.17089125215414938 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "Std", "Pass_at_k": 0.19904938629943747 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "Std", "Pass_at_k": 0.16815110445446094 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "Std", "Pass_at_k": 0.18313053955353828 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "Std", "Pass_at_k": 0.15105015549350911 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "Std", "Pass_at_k": 0.24973689844845592 } ], "pass_5": [ { "Model": "gpt-4o-mini", "Domain": "Computation", "Pass_at_k": 0.9126099706744868 }, { "Model": "gpt-3.5-turbo", "Domain": "Computation", "Pass_at_k": 0.8733137829912023 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "Computation", "Pass_at_k": 0.9014662756598241 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "Computation", "Pass_at_k": 0.8979472140762463 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "Computation", "Pass_at_k": 0.8891495601173021 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "Computation", "Pass_at_k": 0.8979472140762463 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "Computation", "Pass_at_k": 0.8510263929618769 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "Computation", "Pass_at_k": 0.898533724340176 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "Computation", "Pass_at_k": 0.8680351906158358 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "Computation", "Pass_at_k": 0.9102639296187683 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "Computation", "Pass_at_k": 0.8510263929618769 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "Computation", "Pass_at_k": 0.8768328445747801 }, { "Model": "gpt-4o-mini", "Domain": "Network", "Pass_at_k": 0.7265625 }, { "Model": "gpt-3.5-turbo", "Domain": "Network", "Pass_at_k": 0.62890625 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "Network", "Pass_at_k": 0.70703125 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "Network", "Pass_at_k": 0.70703125 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "Network", "Pass_at_k": 0.65625 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "Network", "Pass_at_k": 0.63671875 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "Network", "Pass_at_k": 0.6328125 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "Network", "Pass_at_k": 0.65625 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "Network", "Pass_at_k": 0.63671875 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "Network", "Pass_at_k": 0.640625 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "Network", "Pass_at_k": 0.67578125 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "Network", "Pass_at_k": 0.55859375 }, { "Model": "gpt-4o-mini", "Domain": "Visualization", "Pass_at_k": 0.6182795698924731 }, { "Model": "gpt-3.5-turbo", "Domain": "Visualization", "Pass_at_k": 0.521505376344086 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "Visualization", "Pass_at_k": 0.5483870967741935 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "Visualization", "Pass_at_k": 0.553763440860215 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "Visualization", "Pass_at_k": 0.5376344086021505 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "Visualization", "Pass_at_k": 0.553763440860215 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "Visualization", "Pass_at_k": 0.4838709677419355 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "Visualization", "Pass_at_k": 0.5161290322580645 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "Visualization", "Pass_at_k": 0.5161290322580645 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "Visualization", "Pass_at_k": 0.553763440860215 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "Visualization", "Pass_at_k": 0.543010752688172 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "Visualization", "Pass_at_k": 0.3978494623655914 }, { "Model": "gpt-4o-mini", "Domain": "Basic", "Pass_at_k": 0.7102803738317757 }, { "Model": "gpt-3.5-turbo", "Domain": "Basic", "Pass_at_k": 0.6074766355140186 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "Basic", "Pass_at_k": 0.7383177570093458 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "Basic", "Pass_at_k": 0.6822429906542056 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "Basic", "Pass_at_k": 0.6822429906542056 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "Basic", "Pass_at_k": 0.6728971962616822 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "Basic", "Pass_at_k": 0.6261682242990654 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "Basic", "Pass_at_k": 0.6635514018691588 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "Basic", "Pass_at_k": 0.6448598130841121 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "Basic", "Pass_at_k": 0.6822429906542056 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "Basic", "Pass_at_k": 0.6728971962616822 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "Basic", "Pass_at_k": 0.48598130841121495 }, { "Model": "gpt-4o-mini", "Domain": "System", "Pass_at_k": 0.57 }, { "Model": "gpt-3.5-turbo", "Domain": "System", "Pass_at_k": 0.36 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "System", "Pass_at_k": 0.5 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "System", "Pass_at_k": 0.57 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "System", "Pass_at_k": 0.49 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "System", "Pass_at_k": 0.49 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "System", "Pass_at_k": 0.41 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "System", "Pass_at_k": 0.38 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "System", "Pass_at_k": 0.43 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "System", "Pass_at_k": 0.45 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "System", "Pass_at_k": 0.47 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "System", "Pass_at_k": 0.26 }, { "Model": "gpt-4o-mini", "Domain": "Cryptography", "Pass_at_k": 0.49 }, { "Model": "gpt-3.5-turbo", "Domain": "Cryptography", "Pass_at_k": 0.34 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "Cryptography", "Pass_at_k": 0.46 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "Cryptography", "Pass_at_k": 0.42 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "Cryptography", "Pass_at_k": 0.44 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "Cryptography", "Pass_at_k": 0.44 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "Cryptography", "Pass_at_k": 0.42 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "Cryptography", "Pass_at_k": 0.35 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "Cryptography", "Pass_at_k": 0.4 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "Cryptography", "Pass_at_k": 0.42 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "Cryptography", "Pass_at_k": 0.44 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "Cryptography", "Pass_at_k": 0.21 }, { "Model": "gpt-4o-mini", "Domain": "Mean", "Pass_at_k": 0.6712887357331225 }, { "Model": "gpt-3.5-turbo", "Domain": "Mean", "Pass_at_k": 0.5552003408082177 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "Mean", "Pass_at_k": 0.6425337299072272 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "Mean", "Pass_at_k": 0.6384974825984445 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "Mean", "Pass_at_k": 0.615879493228943 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "Mean", "Pass_at_k": 0.6152211001996906 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "Mean", "Pass_at_k": 0.5706463475004796 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "Mean", "Pass_at_k": 0.5774106930778998 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "Mean", "Pass_at_k": 0.5826237976596688 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "Mean", "Pass_at_k": 0.6094825601888648 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "Mean", "Pass_at_k": 0.6087859319852885 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "Mean", "Pass_at_k": 0.46487622755859775 }, { "Model": "gpt-4o-mini", "Domain": "Std", "Pass_at_k": 0.14747641211035856 }, { "Model": "gpt-3.5-turbo", "Domain": "Std", "Pass_at_k": 0.19743922837233668 }, { "Model": "Qwen2-72B-Instruct-GPTQ-Int4", "Domain": "Std", "Pass_at_k": 0.169043537848292 }, { "Model": "deepseek-coder-33b-instruct", "Domain": "Std", "Pass_at_k": 0.1634243695210041 }, { "Model": "DeepSeek-Coder-V2-Lite-Instruct", "Domain": "Std", "Pass_at_k": 0.16346984877152868 }, { "Model": "deepseek-coder-6.7b-instruct", "Domain": "Std", "Pass_at_k": 0.16363528852513812 }, { "Model": "CodeLlama-34b-Instruct-hf", "Domain": "Std", "Pass_at_k": 0.16828060893964333 }, { "Model": "CodeLlama-13b-Instruct-hf", "Domain": "Std", "Pass_at_k": 0.2055227025195004 }, { "Model": "CodeLlama-7b-Instruct-hf", "Domain": "Std", "Pass_at_k": 0.17281566921046676 }, { "Model": "CodeQwen1.5-7B-Chat", "Domain": "Std", "Pass_at_k": 0.17954181233010988 }, { "Model": "Phi-3-medium-4k-instruct", "Domain": "Std", "Pass_at_k": 0.15450285935340832 }, { "Model": "Llama-2-13b-chat-hf", "Domain": "Std", "Pass_at_k": 0.2409835679041833 } ] }