huseinzol05 commited on
Commit
9301ba4
β€’
1 Parent(s): eb35e0a
Files changed (1) hide show
  1. app.py +98 -89
app.py CHANGED
@@ -21,17 +21,6 @@ INTRODUCTION_TEXT = """
21
  - This test is for programming language understanding.
22
  """
23
 
24
- not_verified = [
25
- {
26
- 'model': 'Antrophic Claude 2',
27
- 'Tatabahasa 0-shot': 61,
28
- 'Tatabahasa 3-shots': 57.8,
29
- },
30
- {
31
- 'model': 'Antrophic Claude 1',
32
- 'Tatabahasa 3-shots': 67,
33
- },
34
- ]
35
  close_source = [
36
  {
37
  'model': 'gpt-4-1106-preview',
@@ -51,85 +40,105 @@ close_source = [
51
  'Tatabahasa 1-shot': 60.80691642651297,
52
  'Tatabahasa 3-shots': 63.03724928366762,
53
  },
 
 
 
 
 
 
 
 
 
54
  ]
55
 
56
  open_source = [
57
- {'model': '[llama2-7b](https://huggingface.co/meta-llama/Llama-2-7b-hf)',
58
- 'Tatabahasa 0-shot': 24.355300859598856,
59
- 'Tatabahasa 1-shot': 28.08022922636103,
60
- 'Tatabahasa 3-shots': 24.641833810888254,
61
- },
62
- {'model': '[malaysian-llama2-7b-32k](https://huggingface.co/mesolitica/llama-7b-hf-32768-fpf)',
63
- 'BM-PT3 0-shot': 20.37037037037037,
64
- 'BM-PT3 1-shot': 20.37037037037037,
65
- 'BM-PT3 3-shots': 29.629629629629626,
66
- 'Tatabahasa 0-shot': 17.765042979942695,
67
- 'Tatabahasa 1-shot': 24.068767908309454,
68
- 'Tatabahasa 3-shots': 27.507163323782237,
69
- },
70
- {'model': '[malaysian-llama2-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-7b-32k-instructions)',
71
- 'BM-PT3 0-shot': 35.294117647058826,
72
- 'BM-PT3 1-shot': 21.153846153846153,
73
- 'BM-PT3 3-shots': 28.30188679245283,
74
- },
75
- {'model': '[malaysian-llama2-13b-32k](https://huggingface.co/mesolitica/llama-13b-hf-32768-fpf)',
76
- 'BM-PT3 0-shot': 33.33333333333333,
77
- 'BM-PT3 1-shot': 20.37037037037037,
78
- 'BM-PT3 3-shots': 31.48148148148148,
79
- 'Tatabahasa 0-shot': 26.07449856733524,
80
- 'Tatabahasa 1-shot': 25.214899713467048,
81
- 'Tatabahasa 3-shots': 24.355300859598856,
82
- },
83
- {'model': '[malaysian-llama2-13b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-13b-32k-instructions)',
84
- 'BM-PT3 0-shot': 28.57142857142857,
85
- 'BM-PT3 1-shot': 12.244897959183673,
86
- 'BM-PT3 3-shots': 17.307692307692307,
87
- },
88
- {'model': '[mistral-7b](https://huggingface.co/mistralai/Mistral-7B-v0.1)',
89
- 'Tatabahasa 0-shot': 28.939828080229223,
90
- 'Tatabahasa 1-shot': 34.38395415472779,
91
- 'Tatabahasa 3-shots': 32.95128939828081,
92
- },
93
- {'model': '[malaysian-mistral-7b-4k](https://huggingface.co/mesolitica/mistral-7b-4096-fpf)',
94
- 'BM-PT3 0-shot': 20.37037037037037,
95
- 'BM-PT3 1-shot': 22.22222222222222,
96
- 'BM-PT3 3-shots': 33.33333333333333,
97
- 'Tatabahasa 0-shot': 21.48997134670487,
98
- 'Tatabahasa 1-shot': 28.939828080229223,
99
- 'Tatabahasa 3-shots': 24.641833810888254,
100
- },
101
- {'model': '[malaysian-mistral-7b-32k](https://huggingface.co/mesolitica/mistral-7b-32768-fpf)',
102
- 'BM-PT3 0-shot': 16.666666666666664,
103
- 'BM-PT3 1-shot': 16.666666666666664,
104
- 'BM-PT3 3-shots': 25.925925925925924,
105
- 'Tatabahasa 0-shot': 18.624641833810887,
106
- 'Tatabahasa 1-shot': 24.355300859598856,
107
- 'Tatabahasa 3-shots': 28.653295128939828,
108
- },
109
- {'model': '[malaysian-mistral-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-mistral-7b-32k-instructions)',
110
- 'BM-PT3 0-shot': 35.18518518518518,
111
- 'BM-PT3 1-shot': 33.33333333333333,
112
- 'BM-PT3 3-shots': 37.03703703703704,
113
- 'Tatabahasa 0-shot': 55.014326647564474,
114
- 'Tatabahasa 1-shot': 42.693409742120345,
115
- 'Tatabahasa 3-shots': 33.33333333333333,
116
- },
117
- {'model': '[aisingapore/sealion3b](https://huggingface.co/aisingapore/sealion3b)',
118
- 'BM-PT3 0-shot': 20.37037037037037,
119
- 'BM-PT3 1-shot': 25.925925925925924,
120
- 'BM-PT3 3-shots': 31.48148148148148,
121
- 'Tatabahasa 0-shot': 21.776504297994272,
122
- 'Tatabahasa 1-shot': 21.776504297994272,
123
- 'Tatabahasa 3-shots': 24.641833810888254,
124
- },
125
- {'model': '[aisingapore/sealion7b](https://huggingface.co/aisingapore/sealion7b)',
126
- 'BM-PT3 0-shot': 20.37037037037037,
127
- 'BM-PT3 1-shot': 24.074074074074073,
128
- 'BM-PT3 3-shots': 33.33333333333333,
129
- 'Tatabahasa 0-shot': 25.787965616045845,
130
- 'Tatabahasa 1-shot': 27.507163323782237,
131
- 'Tatabahasa 3-shots': 26.07449856733524,
132
- }
 
 
 
 
 
 
 
 
 
 
 
133
  ]
134
 
135
  data = pd.DataFrame(close_source + open_source)
@@ -138,6 +147,6 @@ demo = gr.Blocks(css=custom_css)
138
  with demo:
139
  gr.HTML(TITLE)
140
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
141
- gr.DataFrame(data, datatype='markdown')
142
 
143
- demo.launch(server_name='0.0.0.0')
 
21
  - This test is for programming language understanding.
22
  """
23
 
 
 
 
 
 
 
 
 
 
 
 
24
  close_source = [
25
  {
26
  'model': 'gpt-4-1106-preview',
 
40
  'Tatabahasa 1-shot': 60.80691642651297,
41
  'Tatabahasa 3-shots': 63.03724928366762,
42
  },
43
+ {
44
+ 'model': 'Antrophic Claude 2',
45
+ 'Tatabahasa 0-shot': 61,
46
+ 'Tatabahasa 3-shots': 57.8,
47
+ },
48
+ {
49
+ 'model': 'Antrophic Claude 1',
50
+ 'Tatabahasa 3-shots': 67,
51
+ },
52
  ]
53
 
54
  open_source = [
55
+ {
56
+ 'model': '[llama2-7b](https://huggingface.co/meta-llama/Llama-2-7b-hf)',
57
+ 'Tatabahasa 0-shot': 24.355300859598856,
58
+ 'Tatabahasa 1-shot': 28.08022922636103,
59
+ 'Tatabahasa 3-shots': 24.641833810888254,
60
+ },
61
+ {
62
+ 'model': '[malaysian-llama2-7b-32k](https://huggingface.co/mesolitica/llama-7b-hf-32768-fpf)',
63
+ 'BM-PT3 0-shot': 20.37037037037037,
64
+ 'BM-PT3 1-shot': 20.37037037037037,
65
+ 'BM-PT3 3-shots': 29.629629629629626,
66
+ 'Tatabahasa 0-shot': 17.765042979942695,
67
+ 'Tatabahasa 1-shot': 24.068767908309454,
68
+ 'Tatabahasa 3-shots': 27.507163323782237,
69
+ },
70
+ {
71
+ 'model': '[malaysian-llama2-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-7b-32k-instructions)',
72
+ 'BM-PT3 0-shot': 35.294117647058826,
73
+ 'BM-PT3 1-shot': 21.153846153846153,
74
+ 'BM-PT3 3-shots': 28.30188679245283,
75
+ },
76
+ {
77
+ 'model': '[malaysian-llama2-13b-32k](https://huggingface.co/mesolitica/llama-13b-hf-32768-fpf)',
78
+ 'BM-PT3 0-shot': 33.33333333333333,
79
+ 'BM-PT3 1-shot': 20.37037037037037,
80
+ 'BM-PT3 3-shots': 31.48148148148148,
81
+ 'Tatabahasa 0-shot': 26.07449856733524,
82
+ 'Tatabahasa 1-shot': 25.214899713467048,
83
+ 'Tatabahasa 3-shots': 24.355300859598856,
84
+ },
85
+ {
86
+ 'model': '[malaysian-llama2-13b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-13b-32k-instructions)',
87
+ 'BM-PT3 0-shot': 28.57142857142857,
88
+ 'BM-PT3 1-shot': 12.244897959183673,
89
+ 'BM-PT3 3-shots': 17.307692307692307,
90
+ },
91
+ {
92
+ 'model': '[mistral-7b](https://huggingface.co/mistralai/Mistral-7B-v0.1)',
93
+ 'Tatabahasa 0-shot': 28.939828080229223,
94
+ 'Tatabahasa 1-shot': 34.38395415472779,
95
+ 'Tatabahasa 3-shots': 32.95128939828081,
96
+ },
97
+ {
98
+ 'model': '[malaysian-mistral-7b-4k](https://huggingface.co/mesolitica/mistral-7b-4096-fpf)',
99
+ 'BM-PT3 0-shot': 20.37037037037037,
100
+ 'BM-PT3 1-shot': 22.22222222222222,
101
+ 'BM-PT3 3-shots': 33.33333333333333,
102
+ 'Tatabahasa 0-shot': 21.48997134670487,
103
+ 'Tatabahasa 1-shot': 28.939828080229223,
104
+ 'Tatabahasa 3-shots': 24.641833810888254,
105
+ },
106
+ {
107
+ 'model': '[malaysian-mistral-7b-32k](https://huggingface.co/mesolitica/mistral-7b-32768-fpf)',
108
+ 'BM-PT3 0-shot': 16.666666666666664,
109
+ 'BM-PT3 1-shot': 16.666666666666664,
110
+ 'BM-PT3 3-shots': 25.925925925925924,
111
+ 'Tatabahasa 0-shot': 18.624641833810887,
112
+ 'Tatabahasa 1-shot': 24.355300859598856,
113
+ 'Tatabahasa 3-shots': 28.653295128939828,
114
+ },
115
+ {
116
+ 'model': '[malaysian-mistral-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-mistral-7b-32k-instructions)',
117
+ 'BM-PT3 0-shot': 35.18518518518518,
118
+ 'BM-PT3 1-shot': 33.33333333333333,
119
+ 'BM-PT3 3-shots': 37.03703703703704,
120
+ 'Tatabahasa 0-shot': 55.014326647564474,
121
+ 'Tatabahasa 1-shot': 42.693409742120345,
122
+ 'Tatabahasa 3-shots': 33.33333333333333,
123
+ },
124
+ {
125
+ 'model': '[aisingapore/sealion3b](https://huggingface.co/aisingapore/sealion3b)',
126
+ 'BM-PT3 0-shot': 20.37037037037037,
127
+ 'BM-PT3 1-shot': 25.925925925925924,
128
+ 'BM-PT3 3-shots': 31.48148148148148,
129
+ 'Tatabahasa 0-shot': 21.776504297994272,
130
+ 'Tatabahasa 1-shot': 21.776504297994272,
131
+ 'Tatabahasa 3-shots': 24.641833810888254,
132
+ },
133
+ {
134
+ 'model': '[aisingapore/sealion7b](https://huggingface.co/aisingapore/sealion7b)',
135
+ 'BM-PT3 0-shot': 20.37037037037037,
136
+ 'BM-PT3 1-shot': 24.074074074074073,
137
+ 'BM-PT3 3-shots': 33.33333333333333,
138
+ 'Tatabahasa 0-shot': 25.787965616045845,
139
+ 'Tatabahasa 1-shot': 27.507163323782237,
140
+ 'Tatabahasa 3-shots': 26.07449856733524,
141
+ }
142
  ]
143
 
144
  data = pd.DataFrame(close_source + open_source)
 
147
  with demo:
148
  gr.HTML(TITLE)
149
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
150
+ gr.DataFrame(data, datatype = 'markdown')
151
 
152
+ demo.launch()