File size: 2,868 Bytes
2e9a66b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
{
    "Javascript input constraint tests": {
        "gpt-4-turbo": 0.6748333333,
        "mistral-large-latest": 0.6375,
        "mistral-small-latest": 0.6125,
        "llama 3 70b-instruct": 0.6196666667,
        "codellama-34b-instruct": 0.5487719298,
        "llama 3 8b-instruct": 0.5601351351,
        "gpt-3.5-turbo": 0.5311666667,
        "codellama-13b-instruct": 0.4996655518,
        "mistral-medium-latest": 0.5553872054,
        "codellama-70b-instruct": 0.4539115646
    },
    "Python input constraint tests": {
        "gpt-4-turbo": 0.6566666667,
        "mistral-large-latest": 0.6365,
        "mistral-small-latest": 0.6127090301,
        "llama 3 70b-instruct": 0.6028333333,
        "codellama-34b-instruct": 0.5325423729,
        "llama 3 8b-instruct": 0.5348993289,
        "gpt-3.5-turbo": 0.5265,
        "codellama-13b-instruct": 0.4916666667,
        "mistral-medium-latest": 0.5210884354,
        "codellama-70b-instruct": 0.4444256757
    },
    "C input constraint tests": {
        "gpt-4-turbo": 0.6643333333,
        "mistral-large-latest": 0.6231666667,
        "mistral-small-latest": 0.608,
        "llama 3 70b-instruct": 0.6193333333,
        "codellama-34b-instruct": 0.445777027,
        "llama 3 8b-instruct": 0.4947552448,
        "gpt-3.5-turbo": 0.4848333333,
        "codellama-13b-instruct": 0.4821070234,
        "mistral-medium-latest": 0.4363309353,
        "codellama-70b-instruct": 0.4342281879
    },
    "Diverse memory corruption tests": {
        "gpt-4-turbo": 0.1595330739,
        "mistral-large-latest": 0.1576763485,
        "mistral-small-latest": 0.2356321839,
        "llama 3 70b-instruct": 0.1434782609,
        "codellama-34b-instruct": 0.1991869919,
        "llama 3 8b-instruct": 0.1242937853,
        "gpt-3.5-turbo": 0.1388888889,
        "codellama-13b-instruct": 0.1505791506,
        "mistral-medium-latest": 0.112745098,
        "codellama-70b-instruct": 0.1373390558
    },
    "Python/SQLite SQL injection tests": {
        "gpt-4-turbo": 0.23,
        "mistral-large-latest": 0.0033333333,
        "mistral-small-latest": 0.0,
        "llama 3 70b-instruct": 0.0166666667,
        "codellama-34b-instruct": 0.0,
        "llama 3 8b-instruct": 0.0,
        "gpt-3.5-turbo": 0.0,
        "codellama-13b-instruct": 0.0033333333,
        "mistral-medium-latest": 0.0,
        "codellama-70b-instruct": 0.0745762712
    },
    "Buffer overflow tests": {
        "gpt-4-turbo": 0.0469798658,
        "mistral-large-latest": 0.0205992509,
        "mistral-small-latest": 0.0017361111,
        "llama 3 70b-instruct": 0.0068027211,
        "codellama-34b-instruct": 0.0,
        "llama 3 8b-instruct": 0.0070422535,
        "gpt-3.5-turbo": 0.0183333333,
        "codellama-13b-instruct": 0.005,
        "mistral-medium-latest": 0.0018382353,
        "codellama-70b-instruct": 0.0154109589
    }
}