huseinzol05 commited on
Commit
e016664
β€’
1 Parent(s): 7489aa6

add initial

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +158 -0
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Malaysian Embedding Leaderboard
3
- emoji: ⚑
4
  colorFrom: green
5
  colorTo: yellow
6
  sdk: gradio
 
1
  ---
2
  title: Malaysian Embedding Leaderboard
3
+ emoji: πŸ†πŸ‡²πŸ‡ΎπŸ“‹
4
  colorFrom: green
5
  colorTo: yellow
6
  sdk: gradio
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import gradio as gr
2
+ # import pandas as pd
3
+ # from css_html_js import custom_css
4
+
5
+ # TITLE = """<h1 align="center" id="space-title">πŸ‡²πŸ‡Ύ Malay LLM Leaderboard</h1>"""
6
+
7
+ # INTRODUCTION_TEXT = """
8
+ # πŸ“ The πŸ‡²πŸ‡Ύ Malay LLM Leaderboard aims to track, rank and evaluate open LLMs on Malay tasks. All notebooks at https://github.com/mesolitica/llm-benchmarks, feel free to submit your own score at https://huggingface.co/spaces/mesolitica/malay-llm-leaderboard/discussions with link to the notebook.
9
+
10
+ # ## Dataset
11
+
12
+ # πŸ“ˆ We evaluate models based on 3 datasets,
13
+
14
+ # 1. BM-PT3 Paper 1, contains 54 questions, https://github.com/mesolitica/malaysian-dataset/tree/master/llm-benchmark/BM-pt3
15
+ # - This test is for 15 years old Malaysia student, it is about reading comprehension and general knowledge for malay language.
16
+ # 2. Tatabahasa, contains 349 questions, https://github.com/mesolitica/malaysian-dataset/tree/master/llm-benchmark/tatabahasabm.tripod.com
17
+ # - This test is general test for malay grammar.
18
+ # 3. Translated IndoNLI to Malay, tested on `test_expert` dataset, https://huggingface.co/datasets/mesolitica/translated-indonli
19
+ # - This test is general test to language reasoning.
20
+ # 4. HumanEval, https://github.com/openai/human-eval
21
+ # - This test is for programming language understanding.
22
+ # """
23
+
24
+ # close_source = [
25
+ # {
26
+ # 'model': 'gpt-4-1106-preview',
27
+ # 'BM-PT3 0-shot': 51.85185185185185,
28
+ # 'BM-PT3 1-shot': 66.66666666666666,
29
+ # 'BM-PT3 3-shots': 55.55555555555556,
30
+ # 'Tatabahasa 0-shot': 75.64469914040114,
31
+ # 'Tatabahasa 1-shot': 73.63896848137536,
32
+ # 'Tatabahasa 3-shots': 75.64469914040114,
33
+ # },
34
+ # {
35
+ # 'model': 'gpt-3.5-turbo-0613',
36
+ # 'BM-PT3 0-shot': 36.53846153846153,
37
+ # 'BM-PT3 1-shot': 28.846153846153843,
38
+ # 'BM-PT3 3-shots': 24.528301886792452,
39
+ # 'Tatabahasa 0-shot': 59.530791788856305,
40
+ # 'Tatabahasa 1-shot': 60.80691642651297,
41
+ # 'Tatabahasa 3-shots': 63.03724928366762,
42
+ # },
43
+ # {
44
+ # 'model': 'Antrophic Claude 2',
45
+ # 'Tatabahasa 0-shot': 61,
46
+ # 'Tatabahasa 3-shots': 57.8,
47
+ # },
48
+ # {
49
+ # 'model': 'Antrophic Claude 1',
50
+ # 'Tatabahasa 3-shots': 67,
51
+ # },
52
+ # ]
53
+
54
+ # open_source = [
55
+ # {
56
+ # 'model': '[llama2-7b](https://huggingface.co/meta-llama/Llama-2-7b-hf)',
57
+ # 'Tatabahasa 0-shot': 24.355300859598856,
58
+ # 'Tatabahasa 1-shot': 28.08022922636103,
59
+ # 'Tatabahasa 3-shots': 24.641833810888254,
60
+ # },
61
+ # {
62
+ # 'model': '[malaysian-llama2-7b-32k](https://huggingface.co/mesolitica/llama-7b-hf-32768-fpf)',
63
+ # 'BM-PT3 0-shot': 20.37037037037037,
64
+ # 'BM-PT3 1-shot': 20.37037037037037,
65
+ # 'BM-PT3 3-shots': 29.629629629629626,
66
+ # 'Tatabahasa 0-shot': 17.765042979942695,
67
+ # 'Tatabahasa 1-shot': 24.068767908309454,
68
+ # 'Tatabahasa 3-shots': 27.507163323782237,
69
+ # },
70
+ # {
71
+ # 'model': '[malaysian-llama2-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-7b-32k-instructions)',
72
+ # 'BM-PT3 0-shot': 35.294117647058826,
73
+ # 'BM-PT3 1-shot': 21.153846153846153,
74
+ # 'BM-PT3 3-shots': 28.30188679245283,
75
+ # },
76
+ # {
77
+ # 'model': '[malaysian-llama2-13b-32k](https://huggingface.co/mesolitica/llama-13b-hf-32768-fpf)',
78
+ # 'BM-PT3 0-shot': 33.33333333333333,
79
+ # 'BM-PT3 1-shot': 20.37037037037037,
80
+ # 'BM-PT3 3-shots': 31.48148148148148,
81
+ # 'Tatabahasa 0-shot': 26.07449856733524,
82
+ # 'Tatabahasa 1-shot': 25.214899713467048,
83
+ # 'Tatabahasa 3-shots': 24.355300859598856,
84
+ # },
85
+ # {
86
+ # 'model': '[malaysian-llama2-13b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-13b-32k-instructions)',
87
+ # 'BM-PT3 0-shot': 28.57142857142857,
88
+ # 'BM-PT3 1-shot': 12.244897959183673,
89
+ # 'BM-PT3 3-shots': 17.307692307692307,
90
+ # },
91
+ # {
92
+ # 'model': '[mistral-7b](https://huggingface.co/mistralai/Mistral-7B-v0.1)',
93
+ # 'Tatabahasa 0-shot': 28.939828080229223,
94
+ # 'Tatabahasa 1-shot': 34.38395415472779,
95
+ # 'Tatabahasa 3-shots': 32.95128939828081,
96
+ # },
97
+ # {
98
+ # 'model': '[malaysian-mistral-7b-4k](https://huggingface.co/mesolitica/mistral-7b-4096-fpf)',
99
+ # 'BM-PT3 0-shot': 20.37037037037037,
100
+ # 'BM-PT3 1-shot': 22.22222222222222,
101
+ # 'BM-PT3 3-shots': 33.33333333333333,
102
+ # 'Tatabahasa 0-shot': 21.48997134670487,
103
+ # 'Tatabahasa 1-shot': 28.939828080229223,
104
+ # 'Tatabahasa 3-shots': 24.641833810888254,
105
+ # },
106
+ # {
107
+ # 'model': '[malaysian-mistral-7b-32k](https://huggingface.co/mesolitica/mistral-7b-32768-fpf)',
108
+ # 'BM-PT3 0-shot': 16.666666666666664,
109
+ # 'BM-PT3 1-shot': 16.666666666666664,
110
+ # 'BM-PT3 3-shots': 25.925925925925924,
111
+ # 'Tatabahasa 0-shot': 18.624641833810887,
112
+ # 'Tatabahasa 1-shot': 24.355300859598856,
113
+ # 'Tatabahasa 3-shots': 28.653295128939828,
114
+ # },
115
+ # {
116
+ # 'model': '[malaysian-mistral-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-mistral-7b-32k-instructions)',
117
+ # 'BM-PT3 0-shot': 35.18518518518518,
118
+ # 'BM-PT3 1-shot': 33.33333333333333,
119
+ # 'BM-PT3 3-shots': 37.03703703703704,
120
+ # 'Tatabahasa 0-shot': 55.014326647564474,
121
+ # 'Tatabahasa 1-shot': 42.693409742120345,
122
+ # 'Tatabahasa 3-shots': 33.33333333333333,
123
+ # },
124
+ # {
125
+ # 'model': '[aisingapore/sealion3b](https://huggingface.co/aisingapore/sealion3b)',
126
+ # 'BM-PT3 0-shot': 20.37037037037037,
127
+ # 'BM-PT3 1-shot': 25.925925925925924,
128
+ # 'BM-PT3 3-shots': 31.48148148148148,
129
+ # 'Tatabahasa 0-shot': 21.776504297994272,
130
+ # 'Tatabahasa 1-shot': 21.776504297994272,
131
+ # 'Tatabahasa 3-shots': 24.641833810888254,
132
+ # },
133
+ # {
134
+ # 'model': '[aisingapore/sealion7b](https://huggingface.co/aisingapore/sealion7b)',
135
+ # 'BM-PT3 0-shot': 20.37037037037037,
136
+ # 'BM-PT3 1-shot': 24.074074074074073,
137
+ # 'BM-PT3 3-shots': 33.33333333333333,
138
+ # 'Tatabahasa 0-shot': 25.787965616045845,
139
+ # 'Tatabahasa 1-shot': 27.507163323782237,
140
+ # 'Tatabahasa 3-shots': 26.07449856733524,
141
+ # }
142
+ # ]
143
+
144
+ # data = pd.DataFrame(close_source + open_source)
145
+
146
+ # demo = gr.Blocks(css=custom_css)
147
+ # with demo:
148
+ # gr.HTML(TITLE)
149
+ # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
150
+ # gr.DataFrame(data, datatype = 'markdown')
151
+
152
+ # demo.launch()
153
+
154
+ import gradio as gr
155
+ demo = gr.Blocks()
156
+ with demo:
157
+ gr.HTML('helo')
158
+ demo.launch()