Spaces:

mithril-security
/

TCO_calculator

Running

App Files Files Community

jadehardouin commited on Aug 18, 2023

Commit

42b592e

•

1 Parent(s): 0e893b5

Update models.py

Browse files

Files changed (1) hide show

models.py +2 -2

models.py CHANGED Viewed

@@ -145,7 +145,7 @@ class DIYLlama2Model(BaseTCOModel):
             r = maxed_out / 100
             return input_tokens_cost_per_token * 0.65 / r, output_tokens_cost_per_token * 0.65/ r
-        self.source = gr.Markdown("""<span style="font-size: 16px; font-weight: 600; color: #212529;">Source</span>""")
         self.info = gr.Markdown("The cost per input and output tokens values below are from [these benchmark results](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper) that were obtained using the following initial configurations.",
                                  interactive=False,
                                  visible=False)
@@ -156,7 +156,7 @@ class DIYLlama2Model(BaseTCOModel):
         self.vm_cost_per_hour = gr.Number(4.42, label="Instance cost ($) per hour",
                                       interactive=False, visible=False)
         self.info_vm = gr.Markdown("This price above is from [CoreWeave's pricing web page](https://www.coreweave.com/gpu-cloud-pricing)", interactive=False, visible=False)
-        self.maxed_out = gr.Slider(minimum=1, maximum=100, value=65, step=1, label="Maxed out", info="Estimated average percentage of total GPU memory that is used. The instantaneous value can go from very high when many users are using the service to very low when no one does.")
         self.info_maxed_out = gr.Markdown(r"""This percentage influences the input and output cost/token values, and more precisely the number of token/s. Here is the formula used:<br>
                                           $CT = \frac{VM_C}{TS}$ where $TS = TS_{max} * \frac{MO}{100}$ <br>
                                           with: <br>

             r = maxed_out / 100
             return input_tokens_cost_per_token * 0.65 / r, output_tokens_cost_per_token * 0.65/ r
+        self.source = gr.Markdown("""<span style="font-size: 16px; font-weight: 600; color: #212529;">Source</span>""", visible=False)
         self.info = gr.Markdown("The cost per input and output tokens values below are from [these benchmark results](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper) that were obtained using the following initial configurations.",
                                  interactive=False,
                                  visible=False)
         self.vm_cost_per_hour = gr.Number(4.42, label="Instance cost ($) per hour",
                                       interactive=False, visible=False)
         self.info_vm = gr.Markdown("This price above is from [CoreWeave's pricing web page](https://www.coreweave.com/gpu-cloud-pricing)", interactive=False, visible=False)
+        self.maxed_out = gr.Slider(minimum=1, maximum=100, value=65, step=1, label="Maxed out", info="Estimated average percentage of total GPU memory that is used. The instantaneous value can go from very high when many users are using the service to very low when no one does.", visible=False)
         self.info_maxed_out = gr.Markdown(r"""This percentage influences the input and output cost/token values, and more precisely the number of token/s. Here is the formula used:<br>
                                           $CT = \frac{VM_C}{TS}$ where $TS = TS_{max} * \frac{MO}{100}$ <br>
                                           with: <br>