jadehardouin commited on
Commit
42b592e
1 Parent(s): 0e893b5

Update models.py

Browse files
Files changed (1) hide show
  1. models.py +2 -2
models.py CHANGED
@@ -145,7 +145,7 @@ class DIYLlama2Model(BaseTCOModel):
145
  r = maxed_out / 100
146
  return input_tokens_cost_per_token * 0.65 / r, output_tokens_cost_per_token * 0.65/ r
147
 
148
- self.source = gr.Markdown("""<span style="font-size: 16px; font-weight: 600; color: #212529;">Source</span>""")
149
  self.info = gr.Markdown("The cost per input and output tokens values below are from [these benchmark results](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper) that were obtained using the following initial configurations.",
150
  interactive=False,
151
  visible=False)
@@ -156,7 +156,7 @@ class DIYLlama2Model(BaseTCOModel):
156
  self.vm_cost_per_hour = gr.Number(4.42, label="Instance cost ($) per hour",
157
  interactive=False, visible=False)
158
  self.info_vm = gr.Markdown("This price above is from [CoreWeave's pricing web page](https://www.coreweave.com/gpu-cloud-pricing)", interactive=False, visible=False)
159
- self.maxed_out = gr.Slider(minimum=1, maximum=100, value=65, step=1, label="Maxed out", info="Estimated average percentage of total GPU memory that is used. The instantaneous value can go from very high when many users are using the service to very low when no one does.")
160
  self.info_maxed_out = gr.Markdown(r"""This percentage influences the input and output cost/token values, and more precisely the number of token/s. Here is the formula used:<br>
161
  $CT = \frac{VM_C}{TS}$ where $TS = TS_{max} * \frac{MO}{100}$ <br>
162
  with: <br>
 
145
  r = maxed_out / 100
146
  return input_tokens_cost_per_token * 0.65 / r, output_tokens_cost_per_token * 0.65/ r
147
 
148
+ self.source = gr.Markdown("""<span style="font-size: 16px; font-weight: 600; color: #212529;">Source</span>""", visible=False)
149
  self.info = gr.Markdown("The cost per input and output tokens values below are from [these benchmark results](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper) that were obtained using the following initial configurations.",
150
  interactive=False,
151
  visible=False)
 
156
  self.vm_cost_per_hour = gr.Number(4.42, label="Instance cost ($) per hour",
157
  interactive=False, visible=False)
158
  self.info_vm = gr.Markdown("This price above is from [CoreWeave's pricing web page](https://www.coreweave.com/gpu-cloud-pricing)", interactive=False, visible=False)
159
+ self.maxed_out = gr.Slider(minimum=1, maximum=100, value=65, step=1, label="Maxed out", info="Estimated average percentage of total GPU memory that is used. The instantaneous value can go from very high when many users are using the service to very low when no one does.", visible=False)
160
  self.info_maxed_out = gr.Markdown(r"""This percentage influences the input and output cost/token values, and more precisely the number of token/s. Here is the formula used:<br>
161
  $CT = \frac{VM_C}{TS}$ where $TS = TS_{max} * \frac{MO}{100}$ <br>
162
  with: <br>