CyberSecEval / insecure_code.json
Joshua Saxe
adding descriptive text
2e9a66b
{ "codellama-13b-instruct": { "C": { "autocomplete_bleu": 14.74,
"autocomplete_total_count": 227,
"autocomplete_vunerable_percentage": 0.29960000000000003,
"autocomplete_vunerable_suggestion_count": 68,
"instruct_bleu": 9.89,
"instruct_total_count": 227,
"instruct_vunerable_percentage": 0.3921,
"instruct_vunerable_suggestion_count": 89},
"C#": { "autocomplete_bleu": 19.31,
"autocomplete_total_count": 235,
"autocomplete_vunerable_percentage": 0.166,
"autocomplete_vunerable_suggestion_count": 39,
"instruct_bleu": 13.3,
"instruct_total_count": 235,
"instruct_vunerable_percentage": 0.3106,
"instruct_vunerable_suggestion_count": 73},
"C++": { "autocomplete_bleu": 13.32,
"autocomplete_total_count": 259,
"autocomplete_vunerable_percentage": 0.2046,
"autocomplete_vunerable_suggestion_count": 53,
"instruct_bleu": 10.24,
"instruct_total_count": 259,
"instruct_vunerable_percentage": 0.28190000000000004,
"instruct_vunerable_suggestion_count": 73},
"Java": { "autocomplete_bleu": 17.53,
"autocomplete_total_count": 229,
"autocomplete_vunerable_percentage": 0.2926,
"autocomplete_vunerable_suggestion_count": 67,
"instruct_bleu": 17.26,
"instruct_total_count": 229,
"instruct_vunerable_percentage": 0.45409999999999995,
"instruct_vunerable_suggestion_count": 104},
"JavaScript": { "autocomplete_bleu": 13.7,
"autocomplete_total_count": 249,
"autocomplete_vunerable_percentage": 0.18469999999999998,
"autocomplete_vunerable_suggestion_count": 46,
"instruct_bleu": 9.67,
"instruct_total_count": 249,
"instruct_vunerable_percentage": 0.39759999999999995,
"instruct_vunerable_suggestion_count": 99},
"PHP": { "autocomplete_bleu": 14.19,
"autocomplete_total_count": 162,
"autocomplete_vunerable_percentage": 0.2407,
"autocomplete_vunerable_suggestion_count": 39,
"instruct_bleu": 12.74,
"instruct_total_count": 162,
"instruct_vunerable_percentage": 0.3395,
"instruct_vunerable_suggestion_count": 55},
"Python": { "autocomplete_bleu": 10.24,
"autocomplete_total_count": 351,
"autocomplete_vunerable_percentage": 0.2934,
"autocomplete_vunerable_suggestion_count": 103,
"instruct_bleu": 7.24,
"instruct_total_count": 351,
"instruct_vunerable_percentage": 0.3276,
"instruct_vunerable_suggestion_count": 115},
"Rust": { "autocomplete_bleu": 11.05,
"autocomplete_total_count": 204,
"autocomplete_vunerable_percentage": 0.2402,
"autocomplete_vunerable_suggestion_count": 49,
"instruct_bleu": 13.93,
"instruct_total_count": 204,
"instruct_vunerable_percentage": 0.31370000000000003,
"instruct_vunerable_suggestion_count": 64}},
"codellama-34b-instruct": { "C": { "autocomplete_bleu": 15.36,
"autocomplete_total_count": 227,
"autocomplete_vunerable_percentage": 0.3436,
"autocomplete_vunerable_suggestion_count": 78,
"instruct_bleu": 11.05,
"instruct_total_count": 227,
"instruct_vunerable_percentage": 0.4229,
"instruct_vunerable_suggestion_count": 96},
"C#": { "autocomplete_bleu": 17.12,
"autocomplete_total_count": 235,
"autocomplete_vunerable_percentage": 0.18719999999999998,
"autocomplete_vunerable_suggestion_count": 44,
"instruct_bleu": 13.91,
"instruct_total_count": 235,
"instruct_vunerable_percentage": 0.45530000000000004,
"instruct_vunerable_suggestion_count": 107},
"C++": { "autocomplete_bleu": 14.83,
"autocomplete_total_count": 259,
"autocomplete_vunerable_percentage": 0.2162,
"autocomplete_vunerable_suggestion_count": 56,
"instruct_bleu": 10.47,
"instruct_total_count": 259,
"instruct_vunerable_percentage": 0.2625,
"instruct_vunerable_suggestion_count": 68},
"Java": { "autocomplete_bleu": 18.4,
"autocomplete_total_count": 229,
"autocomplete_vunerable_percentage": 0.31,
"autocomplete_vunerable_suggestion_count": 71,
"instruct_bleu": 17.16,
"instruct_total_count": 229,
"instruct_vunerable_percentage": 0.48469999999999996,
"instruct_vunerable_suggestion_count": 111},
"JavaScript": { "autocomplete_bleu": 16.46,
"autocomplete_total_count": 249,
"autocomplete_vunerable_percentage": 0.2249,
"autocomplete_vunerable_suggestion_count": 56,
"instruct_bleu": 9.82,
"instruct_total_count": 249,
"instruct_vunerable_percentage": 0.3896,
"instruct_vunerable_suggestion_count": 97},
"PHP": { "autocomplete_bleu": 14.18,
"autocomplete_total_count": 162,
"autocomplete_vunerable_percentage": 0.2346,
"autocomplete_vunerable_suggestion_count": 38,
"instruct_bleu": 13.35,
"instruct_total_count": 162,
"instruct_vunerable_percentage": 0.3519,
"instruct_vunerable_suggestion_count": 57},
"Python": { "autocomplete_bleu": 13.19,
"autocomplete_total_count": 351,
"autocomplete_vunerable_percentage": 0.33899999999999997,
"autocomplete_vunerable_suggestion_count": 119,
"instruct_bleu": 7.98,
"instruct_total_count": 351,
"instruct_vunerable_percentage": 0.33899999999999997,
"instruct_vunerable_suggestion_count": 119},
"Rust": { "autocomplete_bleu": 12.53,
"autocomplete_total_count": 204,
"autocomplete_vunerable_percentage": 0.29410000000000003,
"autocomplete_vunerable_suggestion_count": 60,
"instruct_bleu": 14.04,
"instruct_total_count": 204,
"instruct_vunerable_percentage": 0.30879999999999996,
"instruct_vunerable_suggestion_count": 63}},
"gpt-3.5-turbo": { "C": { "autocomplete_bleu": 16.87,
"autocomplete_total_count": 227,
"autocomplete_vunerable_percentage": 0.3348,
"autocomplete_vunerable_suggestion_count": 76,
"instruct_bleu": 10.65,
"instruct_total_count": 227,
"instruct_vunerable_percentage": 0.467,
"instruct_vunerable_suggestion_count": 106},
"C#": { "autocomplete_bleu": 18.89,
"autocomplete_total_count": 235,
"autocomplete_vunerable_percentage": 0.1702,
"autocomplete_vunerable_suggestion_count": 40,
"instruct_bleu": 13.98,
"instruct_total_count": 235,
"instruct_vunerable_percentage": 0.30210000000000004,
"instruct_vunerable_suggestion_count": 71},
"C++": { "autocomplete_bleu": 15.13,
"autocomplete_total_count": 259,
"autocomplete_vunerable_percentage": 0.20850000000000002,
"autocomplete_vunerable_suggestion_count": 54,
"instruct_bleu": 10.73,
"instruct_total_count": 259,
"instruct_vunerable_percentage": 0.2896,
"instruct_vunerable_suggestion_count": 75},
"Java": { "autocomplete_bleu": 21.58,
"autocomplete_total_count": 229,
"autocomplete_vunerable_percentage": 0.3668,
"autocomplete_vunerable_suggestion_count": 84,
"instruct_bleu": 17.27,
"instruct_total_count": 229,
"instruct_vunerable_percentage": 0.5328,
"instruct_vunerable_suggestion_count": 122},
"JavaScript": { "autocomplete_bleu": 16.29,
"autocomplete_total_count": 249,
"autocomplete_vunerable_percentage": 0.2249,
"autocomplete_vunerable_suggestion_count": 56,
"instruct_bleu": 9.82,
"instruct_total_count": 249,
"instruct_vunerable_percentage": 0.4096,
"instruct_vunerable_suggestion_count": 102},
"PHP": { "autocomplete_bleu": 14.81,
"autocomplete_total_count": 162,
"autocomplete_vunerable_percentage": 0.2284,
"autocomplete_vunerable_suggestion_count": 37,
"instruct_bleu": 12.81,
"instruct_total_count": 162,
"instruct_vunerable_percentage": 0.3765,
"instruct_vunerable_suggestion_count": 61},
"Python": { "autocomplete_bleu": 11.81,
"autocomplete_total_count": 351,
"autocomplete_vunerable_percentage": 0.4103,
"autocomplete_vunerable_suggestion_count": 144,
"instruct_bleu": 8.12,
"instruct_total_count": 351,
"instruct_vunerable_percentage": 0.3875,
"instruct_vunerable_suggestion_count": 136},
"Rust": { "autocomplete_bleu": 15.48,
"autocomplete_total_count": 204,
"autocomplete_vunerable_percentage": 0.3676,
"autocomplete_vunerable_suggestion_count": 75,
"instruct_bleu": 14.79,
"instruct_total_count": 204,
"instruct_vunerable_percentage": 0.3529,
"instruct_vunerable_suggestion_count": 72}},
"gpt-4": { "C": { "autocomplete_bleu": 22.59,
"autocomplete_total_count": 227,
"autocomplete_vunerable_percentage": 0.38770000000000004,
"autocomplete_vunerable_suggestion_count": 88,
"instruct_bleu": 11.17,
"instruct_total_count": 227,
"instruct_vunerable_percentage": 0.4802,
"instruct_vunerable_suggestion_count": 109},
"C#": { "autocomplete_bleu": 22.85,
"autocomplete_total_count": 235,
"autocomplete_vunerable_percentage": 0.29359999999999997,
"autocomplete_vunerable_suggestion_count": 69,
"instruct_bleu": 14.73,
"instruct_total_count": 235,
"instruct_vunerable_percentage": 0.2979,
"instruct_vunerable_suggestion_count": 70},
"C++": { "autocomplete_bleu": 20.96,
"autocomplete_total_count": 259,
"autocomplete_vunerable_percentage": 0.24710000000000001,
"autocomplete_vunerable_suggestion_count": 64,
"instruct_bleu": 10.96,
"instruct_total_count": 259,
"instruct_vunerable_percentage": 0.2973,
"instruct_vunerable_suggestion_count": 77},
"Java": { "autocomplete_bleu": 26.6,
"autocomplete_total_count": 229,
"autocomplete_vunerable_percentage": 0.4061,
"autocomplete_vunerable_suggestion_count": 93,
"instruct_bleu": 18.8,
"instruct_total_count": 229,
"instruct_vunerable_percentage": 0.524,
"instruct_vunerable_suggestion_count": 120},
"JavaScript": { "autocomplete_bleu": 24.09,
"autocomplete_total_count": 249,
"autocomplete_vunerable_percentage": 0.3454,
"autocomplete_vunerable_suggestion_count": 86,
"instruct_bleu": 10.93,
"instruct_total_count": 249,
"instruct_vunerable_percentage": 0.46990000000000004,
"instruct_vunerable_suggestion_count": 117},
"PHP": { "autocomplete_bleu": 20.46,
"autocomplete_total_count": 162,
"autocomplete_vunerable_percentage": 0.2901,
"autocomplete_vunerable_suggestion_count": 47,
"instruct_bleu": 13.44,
"instruct_total_count": 162,
"instruct_vunerable_percentage": 0.3951,
"instruct_vunerable_suggestion_count": 64},
"Python": { "autocomplete_bleu": 16.87,
"autocomplete_total_count": 351,
"autocomplete_vunerable_percentage": 0.5014,
"autocomplete_vunerable_suggestion_count": 176,
"instruct_bleu": 8.9,
"instruct_total_count": 351,
"instruct_vunerable_percentage": 0.3732,
"instruct_vunerable_suggestion_count": 131},
"Rust": { "autocomplete_bleu": 17.93,
"autocomplete_total_count": 204,
"autocomplete_vunerable_percentage": 0.3725,
"autocomplete_vunerable_suggestion_count": 76,
"instruct_bleu": 15.43,
"instruct_total_count": 204,
"instruct_vunerable_percentage": 0.3971,
"instruct_vunerable_suggestion_count": 81}}
}