carbonnnnn
commited on
Commit
·
f12cdfe
1
Parent(s):
1580227
update
Browse files- app.py +0 -1
- assets/__pycache__/text_content.cpython-313.pyc +0 -0
- assets/pricing.json +212 -0
- assets/text_content.py +3 -0
- prepare_path.sh +2 -0
- requirements.txt +2 -1
- src/collect_data.py +18 -0
- utils/__pycache__/filter_utils.cpython-313.pyc +0 -0
- utils/__pycache__/text_content.cpython-313.pyc +0 -0
app.py
CHANGED
@@ -195,7 +195,6 @@ with llm_calc_app:
|
|
195 |
elem_id="text-leaderboard-table",
|
196 |
interactive=False,
|
197 |
visible=True,
|
198 |
-
height=800,
|
199 |
datatype=['html', 'number', 'number', 'date', 'number', 'number', 'number', 'number', 'html']
|
200 |
)
|
201 |
|
|
|
195 |
elem_id="text-leaderboard-table",
|
196 |
interactive=False,
|
197 |
visible=True,
|
|
|
198 |
datatype=['html', 'number', 'number', 'date', 'number', 'number', 'number', 'number', 'html']
|
199 |
)
|
200 |
|
assets/__pycache__/text_content.cpython-313.pyc
ADDED
Binary file (424 Bytes). View file
|
|
assets/pricing.json
ADDED
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"model_id": "gpt-4-1106-vision-preview",
|
4 |
+
"input": "10$",
|
5 |
+
"output": "30$"
|
6 |
+
},
|
7 |
+
{
|
8 |
+
"model_id": "gpt-4o-2024-05-13",
|
9 |
+
"input": "5$",
|
10 |
+
"output": "15$"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"model_id": "gpt-4o-2024-08-06",
|
14 |
+
"input": "3.750$",
|
15 |
+
"output": "15$"
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"model_id": "gpt-4o-mini-2024-07-18",
|
19 |
+
"input": "0.300$",
|
20 |
+
"output": "1.200$"
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"model_id": "gpt-4-turbo-2024-04-09",
|
24 |
+
"input": "10$",
|
25 |
+
"output": "30$"
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"model_id": "gpt-4-1106-preview",
|
29 |
+
"input": "",
|
30 |
+
"output": ""
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"model_id": "gpt-4-0125-preview",
|
34 |
+
"input": "10$",
|
35 |
+
"output": "30$"
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"model_id": "o1-preview-2024-09-12",
|
39 |
+
"input": "15$",
|
40 |
+
"output": "60$"
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"model_id": "o1-mini-2024-09-12",
|
44 |
+
"input": "3$",
|
45 |
+
"output": "12$"
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"model_id": "gpt-3.5-turbo-0125",
|
49 |
+
"input": "0.5$",
|
50 |
+
"output": "1.5$"
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"model_id": "gpt-4-0613",
|
54 |
+
"input": "",
|
55 |
+
"output": ""
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"model_id": "gpt-4-0314",
|
59 |
+
"input": "",
|
60 |
+
"output": ""
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"model_id": "gpt-3.5-turbo-1106",
|
64 |
+
"input": "1$",
|
65 |
+
"output": "2$"
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"model_id": "gpt-3.5-turbo-0613",
|
69 |
+
"input": "1.5$",
|
70 |
+
"output": "2$"
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"model_id": "command",
|
74 |
+
"input": "",
|
75 |
+
"output": ""
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"model_id": "command-light",
|
79 |
+
"input": "",
|
80 |
+
"output": ""
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"model_id": "claude-v1.3",
|
84 |
+
"input": "",
|
85 |
+
"output": ""
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"model_id": "claude-v1.3-100k",
|
89 |
+
"input": "",
|
90 |
+
"output": ""
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"model_id": "claude-instant-1.2",
|
94 |
+
"input": "",
|
95 |
+
"output": ""
|
96 |
+
},
|
97 |
+
{
|
98 |
+
"model_id": "claude-2",
|
99 |
+
"input": "8$",
|
100 |
+
"output": "24$"
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"model_id": "claude-2.1",
|
104 |
+
"input": "8$",
|
105 |
+
"output": "24$"
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"model_id": "claude-3-opus-20240229",
|
109 |
+
"input": "15$",
|
110 |
+
"output": "75$"
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"model_id": "claude-3-sonnet-20240229",
|
114 |
+
"input": "3$",
|
115 |
+
"output": "15$"
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"model_id": "claude-3-haiku-20240307",
|
119 |
+
"input": "0.25$",
|
120 |
+
"output": "1.25$"
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"model_id": "claude-3-5-sonnet-20240620",
|
124 |
+
"input": "3$",
|
125 |
+
"output": "15$"
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"model_id": "claude-3-5-haiku-20241022",
|
129 |
+
"input": "0.8$",
|
130 |
+
"output": "4$"
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"model_id": "claude-3-5-sonnet-20241022",
|
134 |
+
"input": "3$",
|
135 |
+
"output": "15$"
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"model_id": "gemini-1.0-pro-001",
|
139 |
+
"input": "0.5$",
|
140 |
+
"output": "1.5$"
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"model_id": "gemini-1.0-pro-002",
|
144 |
+
"input": "0.5$",
|
145 |
+
"output": "1.5$"
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"model_id": "gemini-1.0-pro-vision-latest",
|
149 |
+
"input": "0.5$",
|
150 |
+
"output": "1.5$"
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"model_id": "gemini-1.5-flash-001",
|
154 |
+
"input": "0.075$",
|
155 |
+
"output": "0.3$"
|
156 |
+
},
|
157 |
+
{
|
158 |
+
"model_id": "gemini-1.5-pro-001",
|
159 |
+
"input": "1.25$",
|
160 |
+
"output": "5$"
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"model_id": "gemini-1.5-pro-002",
|
164 |
+
"input": "1.25$",
|
165 |
+
"output": "5$"
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"model_id": "gemini-1.5-flash-002",
|
169 |
+
"input": "0.075$",
|
170 |
+
"output": "0.3$"
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"model_id": "gemini-1.5-flash-8b-001",
|
174 |
+
"input": "0.0375$",
|
175 |
+
"output": "0.15$"
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"model_id": "gemini-2.0-flash-exp",
|
179 |
+
"input": "0$",
|
180 |
+
"output": "0$"
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"model_id": "luminous-supreme-control",
|
184 |
+
"input": "",
|
185 |
+
"output": ""
|
186 |
+
},
|
187 |
+
{
|
188 |
+
"model_id": "luminous-supreme",
|
189 |
+
"input": "",
|
190 |
+
"output": ""
|
191 |
+
},
|
192 |
+
{
|
193 |
+
"model_id": "luminous-extended",
|
194 |
+
"input": "",
|
195 |
+
"output": ""
|
196 |
+
},
|
197 |
+
{
|
198 |
+
"model_id": "luminous-base",
|
199 |
+
"input": "",
|
200 |
+
"output": ""
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"model_id": "luminous-base",
|
204 |
+
"input": "",
|
205 |
+
"output": ""
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"model_id": "luminous-base",
|
209 |
+
"input": "",
|
210 |
+
"output": ""
|
211 |
+
}
|
212 |
+
]
|
assets/text_content.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
CLEMBENCH_RUNS_REPO = "https://raw.githubusercontent.com/clembench/clembench-runs/main/"
|
2 |
+
REGISTRY_URL = "https://raw.githubusercontent.com/clp-research/clembench/refs/heads/refactor_model_registry/backends/model_registry.json"
|
3 |
+
BENCHMARK_FILE = "benchmark_runs.json"
|
prepare_path.sh
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
#!/bash/bin
|
2 |
+
export PYTHONPATH=.:$PYTHONPATH
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
beautifulsoup4==4.12.3
|
2 |
pandas==2.2.3
|
3 |
gradio_rangeslider==0.0.7
|
4 |
-
gradio==
|
|
|
|
1 |
beautifulsoup4==4.12.3
|
2 |
pandas==2.2.3
|
3 |
gradio_rangeslider==0.0.7
|
4 |
+
gradio==5.9.0
|
5 |
+
langcodes==3.5.0
|
src/collect_data.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Collect data from the multiple sources and create a base datafranme for the LLMCalculator table
|
3 |
+
Latency - https://github.com/clembench/clembench-runs/tree/main/Addenda/Latency
|
4 |
+
Pricing - pricing.json
|
5 |
+
Model info - https://github.com/kushal-10/clembench/blob/feat/registry/backends/model_registry_updated.json
|
6 |
+
"""
|
7 |
+
|
8 |
+
import pandas as pd
|
9 |
+
import json
|
10 |
+
import requests
|
11 |
+
from assets.text_content import REPO, REGISTRY_URL, BENCHMARK_FILE
|
12 |
+
|
13 |
+
response = requests.get(REGISTRY_URL)
|
14 |
+
|
15 |
+
# Check if the JSON file request was successful
|
16 |
+
if response.status_code != 200:
|
17 |
+
print(f"Failed to read JSON file - {REGISTRY_URL}. Status Code: {response.status_code}")
|
18 |
+
|
utils/__pycache__/filter_utils.cpython-313.pyc
ADDED
Binary file (3.87 kB). View file
|
|
utils/__pycache__/text_content.cpython-313.pyc
ADDED
Binary file (423 Bytes). View file
|
|