Update README.md
Browse files
README.md
CHANGED
@@ -39,6 +39,8 @@ Total 153,013 samples.
|
|
39 |
|
40 |
|
41 |
|
|
|
|
|
42 |
## HumanEval
|
43 |
|
44 |
| human-eval | pass@1 |
|
@@ -62,6 +64,17 @@ Total 153,013 samples.
|
|
62 |
| CodeLlama-13B-Python| 42.89|
|
63 |
| CodeLlama-13B| 35.07|
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
## lm-evaluation-harness
|
67 |
|
|
|
39 |
|
40 |
|
41 |
|
42 |
+
|
43 |
+
|
44 |
## HumanEval
|
45 |
|
46 |
| human-eval | pass@1 |
|
|
|
64 |
| CodeLlama-13B-Python| 42.89|
|
65 |
| CodeLlama-13B| 35.07|
|
66 |
|
67 |
+
## NL2SQL
|
68 |
+
|
69 |
+
SQL-EVAL: 125/175 (71.43%)
|
70 |
+
|
71 |
+
Average rate of exact match: 67.43%
|
72 |
+
|
73 |
+
Average correct rate: 71.43%
|
74 |
+
|
75 |
+
- GPT4: 130/175 (74.29%)
|
76 |
+
- GPT3-Turbo-0613: 105/174 (60.00%)
|
77 |
+
|
78 |
|
79 |
## lm-evaluation-harness
|
80 |
|