jadehardouin commited on
Commit
4424c49
·
1 Parent(s): 0ad933c

Update models.py

Browse files
Files changed (1) hide show
  1. models.py +64 -24
models.py CHANGED
@@ -99,7 +99,7 @@ class OpenSourceLlama2Model(BaseTCOModel):
99
  with: <br>
100
  CT = Cost per Token <br>
101
  VM_CH = VM Cost per Hour <br>
102
- TS = Tokens per Second (for an input length of 233 tokens) <br>
103
  MO = Maxed Out <br>
104
  U = Used
105
  """)
@@ -107,15 +107,16 @@ class OpenSourceLlama2Model(BaseTCOModel):
107
 
108
  def render(self):
109
  vm_choices = ["1x Nvidia A100 (Azure NC24ads A100 v4)",
110
- "2x Nvidia A100 (Azure NC48ads A100 v4)"]
 
111
 
112
  def on_model_change(model):
113
  if model == "Llama 2 7B":
114
- return gr.Dropdown.update(choices=vm_choices)
115
  else:
116
- not_supported_vm = ["1x Nvidia A100 (Azure NC24ads A100 v4)"]
117
  choices = [x for x in vm_choices if x not in not_supported_vm]
118
- return gr.Dropdown.update(choices=choices)
119
 
120
  def on_vm_change(model, vm):
121
  # TO DO: load info from CSV
@@ -123,6 +124,10 @@ class OpenSourceLlama2Model(BaseTCOModel):
123
  return [gr.Number.update(value=3.6730), gr.Number.update(value=694.38)]
124
  elif model == "Llama 2 7B" and vm == "2x Nvidia A100 (Azure NC48ads A100 v4)":
125
  return [gr.Number.update(value=7.346), gr.Number.update(value=1388.76)]
 
 
 
 
126
 
127
  self.model = gr.Dropdown(["Llama 2 7B", "Llama 2 70B"], value="Llama 2 7B", label="OpenSource models", visible=False)
128
  self.vm = gr.Dropdown(vm_choices,
@@ -132,22 +137,67 @@ class OpenSourceLlama2Model(BaseTCOModel):
132
  info="Your options for this choice depend on the model you previously chose"
133
  )
134
  self.vm_cost_per_hour = gr.Number(3.6730, label="VM instance cost per hour",
135
- interactive=True, visible=False)
136
  self.tokens_per_second = gr.Number(694.38, visible=False,
137
  label="Number of tokens per second for this specific model and VM instance",
138
  interactive=False
139
  )
140
- self.input_length = gr.Number(350, label="Average number of input tokens",
141
- interactive=True, visible=False)
 
 
142
 
143
- self.model.change(on_model_change, inputs=self.model, outputs=self.vm)
144
  self.vm.change(on_vm_change, inputs=[self.model, self.vm], outputs=[self.vm_cost_per_hour, self.tokens_per_second])
145
  self.maxed_out = gr.Slider(minimum=0.01, value=50., step=0.01, label="% maxed out",
146
- info="How much the GPU is fully used.",
147
  interactive=True,
148
  visible=False)
149
  self.used = gr.Slider(minimum=0.01, value=50., step=0.01, label="% used",
150
- info="Percentage of time the GPU is used.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  interactive=True,
152
  visible=False)
153
 
@@ -174,8 +224,8 @@ class CohereModel(BaseTCOModel):
174
  else:
175
  return gr.Dropdown.update(choices=["Default", "Custom"])
176
 
177
- self.use_case = gr.Dropdown(["Embed", "Generate", "Classify", "Summarize"], value="Generate",
178
- label="Use case",
179
  interactive=True, visible=False)
180
  self.model = gr.Dropdown(["Default", "Custom"], value="Default",
181
  label="Model",
@@ -189,21 +239,11 @@ class CohereModel(BaseTCOModel):
189
  use_case = use_case[0]
190
  model = model[0]
191
 
192
- if use_case == "Embed":
193
- if model == "Default":
194
- cost_per_1M_input_tokens = 0.4
195
- else:
196
- cost_per_1M_input_tokens = 0.8
197
- elif use_case == "Generate":
198
  if model == "Default":
199
  cost_per_1M_input_tokens = 15
200
  else:
201
  cost_per_1M_input_tokens = 30
202
- elif use_case == "Classify":
203
- if model == "Default":
204
- cost_per_1M_input_tokens = 200
205
- else:
206
- cost_per_1M_input_tokens = 200
207
  else:
208
  cost_per_1M_input_tokens = 15
209
 
 
99
  with: <br>
100
  CT = Cost per Token <br>
101
  VM_CH = VM Cost per Hour <br>
102
+ TS = Tokens per Second <br>
103
  MO = Maxed Out <br>
104
  U = Used
105
  """)
 
107
 
108
  def render(self):
109
  vm_choices = ["1x Nvidia A100 (Azure NC24ads A100 v4)",
110
+ "2x Nvidia A100 (Azure NC48ads A100 v4)",
111
+ "4x Nvidia A100 (Azure NC48ads A100 v4)"]
112
 
113
  def on_model_change(model):
114
  if model == "Llama 2 7B":
115
+ return [gr.Dropdown.update(choices=vm_choices), gr.Markdown.update(visible=True), gr.Markdown.update(visible=False)]
116
  else:
117
+ not_supported_vm = ["1x Nvidia A100 (Azure NC24ads A100 v4)", "2x Nvidia A100 (Azure NC48ads A100 v4)"]
118
  choices = [x for x in vm_choices if x not in not_supported_vm]
119
+ return [gr.Dropdown.update(choices=choices), gr.Markdown.update(visible=False), gr.Markdown.update(visible=True)]
120
 
121
  def on_vm_change(model, vm):
122
  # TO DO: load info from CSV
 
124
  return [gr.Number.update(value=3.6730), gr.Number.update(value=694.38)]
125
  elif model == "Llama 2 7B" and vm == "2x Nvidia A100 (Azure NC48ads A100 v4)":
126
  return [gr.Number.update(value=7.346), gr.Number.update(value=1388.76)]
127
+ elif model == "Llama 2 7B" and vm == "4x Nvidia A100 (Azure NC48ads A100 v4)":
128
+ return [gr.Number.update(value=14.692), gr.Number.update(value=2777.52)]
129
+ elif model == "Llama 2 70B" and vm == "4x Nvidia A100 (Azure NC48ads A100 v4)":
130
+ return [gr.Number.update(value=14.692), gr.Number.update(value=18.6)]
131
 
132
  self.model = gr.Dropdown(["Llama 2 7B", "Llama 2 70B"], value="Llama 2 7B", label="OpenSource models", visible=False)
133
  self.vm = gr.Dropdown(vm_choices,
 
137
  info="Your options for this choice depend on the model you previously chose"
138
  )
139
  self.vm_cost_per_hour = gr.Number(3.6730, label="VM instance cost per hour",
140
+ interactive=False, visible=False)
141
  self.tokens_per_second = gr.Number(694.38, visible=False,
142
  label="Number of tokens per second for this specific model and VM instance",
143
  interactive=False
144
  )
145
+ self.input_length = gr.Number(233, label="Average number of input tokens", info="This is the number of input tokens used when the model was benchmarked to get the number of tokens/second it processes",
146
+ interactive=False, visible=False)
147
+ self.info_7B = gr.Markdown("To see the script used to benchmark the Llama2-7B model, [click here](https://example.com/script)", interactive=False, visible=False)
148
+ self.info_70B = gr.Markdown("To see the benchmark results used for the Llama2-70B model, [click here](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper)", interactive=False, visible=False)
149
 
150
+ self.model.change(on_model_change, inputs=self.model, outputs=[self.vm, self.info_7B, self.info_70B])
151
  self.vm.change(on_vm_change, inputs=[self.model, self.vm], outputs=[self.vm_cost_per_hour, self.tokens_per_second])
152
  self.maxed_out = gr.Slider(minimum=0.01, value=50., step=0.01, label="% maxed out",
153
+ info="How much the GPU is fully used",
154
  interactive=True,
155
  visible=False)
156
  self.used = gr.Slider(minimum=0.01, value=50., step=0.01, label="% used",
157
+ info="Percentage of time the GPU is used",
158
+ interactive=True,
159
+ visible=False)
160
+
161
+ def compute_cost_per_token(self, vm_cost_per_hour, tokens_per_second, maxed_out, used):
162
+ cost_per_token = vm_cost_per_hour / (tokens_per_second * 3600 * maxed_out * used)
163
+ return cost_per_token
164
+
165
+ class OpenSourceDIY(BaseTCOModel):
166
+
167
+ def __init__(self):
168
+ self.set_name("(Open source) DIY")
169
+ self.set_formula(r"""$CT = \frac{VM\_CH}{TS \times 3600 \times MO \times U}$<br>
170
+ with: <br>
171
+ CT = Cost per Token <br>
172
+ VM_CH = VM Cost per Hour <br>
173
+ TS = Tokens per Second <br>
174
+ MO = Maxed Out <br>
175
+ U = Used
176
+ """)
177
+ super().__init__()
178
+
179
+ def render(self):
180
+ self.info = gr.Markdown("Compute the cost/token based on our formula below, using your own parameters", visible=False)
181
+ self.display_formula = gr.Markdown(r"""$CT = \frac{VM\_CH}{TS \times 3600 \times MO \times U}$<br>
182
+ with: <br>
183
+ CT = Cost per Token <br>
184
+ VM_CH = VM Cost per Hour <br>
185
+ TS = Tokens per Second <br>
186
+ MO = Maxed Out <br>
187
+ U = Used
188
+ """, visible=False)
189
+ self.vm_cost_per_hour = gr.Number(3.5, label="VM instance cost per hour",
190
+ interactive=True, visible=False)
191
+ self.tokens_per_second = gr.Number(700, visible=False,
192
+ label="Number of tokens per second for this specific model and VM instance",
193
+ interactive=True
194
+ )
195
+ self.maxed_out = gr.Slider(minimum=0.01, value=50., step=0.01, label="% maxed out",
196
+ info="How much the GPU is fully used",
197
+ interactive=True,
198
+ visible=False)
199
+ self.used = gr.Slider(minimum=0.01, value=50., step=0.01, label="% used",
200
+ info="Percentage of time the GPU is used",
201
  interactive=True,
202
  visible=False)
203
 
 
224
  else:
225
  return gr.Dropdown.update(choices=["Default", "Custom"])
226
 
227
+ self.use_case = gr.Dropdown(["Generate", "Summarize"], value="Generate",
228
+ label="API",
229
  interactive=True, visible=False)
230
  self.model = gr.Dropdown(["Default", "Custom"], value="Default",
231
  label="Model",
 
239
  use_case = use_case[0]
240
  model = model[0]
241
 
242
+ if use_case == "Generate":
 
 
 
 
 
243
  if model == "Default":
244
  cost_per_1M_input_tokens = 15
245
  else:
246
  cost_per_1M_input_tokens = 30
 
 
 
 
 
247
  else:
248
  cost_per_1M_input_tokens = 15
249