dhuynh95 commited on
Commit
829e19d
1 Parent(s): c194fcd

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +2 -2
  2. models.py +30 -13
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import models
3
 
4
  with gr.Blocks() as demo:
5
- Models: list[models.BaseTCOModel] = [models.OpenAIModel, models.OpenSourceModel]
6
  model_names = [Model().get_name() for Model in Models]
7
  with gr.Row():
8
  with gr.Column():
@@ -20,6 +20,6 @@ with gr.Blocks() as demo:
20
 
21
  compute_tco_btn = gr.Button("Compute TCO")
22
  tco_output = gr.Text("Output: ")
23
- compute_tco_btn.click(page1.compute_cost_per_token, inputs=page1.get_all_components() + [dropdown], outputs=tco_output)
24
 
25
  demo.launch(debug=True)
 
2
  import models
3
 
4
  with gr.Blocks() as demo:
5
+ Models: list[models.BaseTCOModel] = [models.OpenAIModel, models.OpenSourceLlama2Model]
6
  model_names = [Model().get_name() for Model in Models]
7
  with gr.Row():
8
  with gr.Column():
 
20
 
21
  compute_tco_btn = gr.Button("Compute TCO")
22
  tco_output = gr.Text("Output: ")
23
+ compute_tco_btn.click(page1.compute_cost_per_token, inputs=page1.get_all_components_for_cost_computing() + [dropdown], outputs=tco_output)
24
 
25
  demo.launch(debug=True)
models.py CHANGED
@@ -1,7 +1,7 @@
1
  from gradio.components import Component
2
  import gradio as gr
3
- import uuid
4
  from abc import ABC, abstractclassmethod
 
5
 
6
  class BaseTCOModel(ABC):
7
  # TO DO: Find way to specify which component should be used for computing cost
@@ -16,9 +16,16 @@ class BaseTCOModel(ABC):
16
  def get_components(self) -> list[Component]:
17
  return self._components
18
 
 
 
 
19
  def get_name(self):
20
  return self.name
21
 
 
 
 
 
22
  @abstractclassmethod
23
  def compute_cost_per_token(self):
24
  pass
@@ -29,7 +36,6 @@ class BaseTCOModel(ABC):
29
 
30
  def set_name(self, name):
31
  self.name = name
32
- self.id = name + str(uuid.uuid4())
33
 
34
  class OpenAIModel(BaseTCOModel):
35
 
@@ -75,14 +81,15 @@ class OpenAIModel(BaseTCOModel):
75
 
76
  return cost_per_output_token
77
 
78
- class OpenSourceModel(BaseTCOModel):
79
  def __init__(self):
80
- self.set_name("(Open source) Deploy yourself")
81
  super().__init__()
82
 
83
  def render(self):
84
  vm_choices = ["1x Nvidia A100 (Azure NC24ads A100 v4)",
85
  "2x Nvidia A100 (Azure NC48ads A100 v4)"]
 
86
  def on_model_change(model):
87
  if model == "Llama 2 7B":
88
  return gr.Dropdown.update(choices=vm_choices)
@@ -103,7 +110,9 @@ class OpenSourceModel(BaseTCOModel):
103
  visible=False,
104
  label="Instance of VM with GPU"
105
  )
106
- self.tokens_per_second = gr.Number(visible=False,
 
 
107
  label="Number of tokens per second for this specific model and VM instance",
108
  interactive=False
109
  )
@@ -112,14 +121,14 @@ class OpenSourceModel(BaseTCOModel):
112
 
113
  self.model.change(on_model_change, inputs=self.model, outputs=self.vm)
114
  self.vm.change(on_vm_change, inputs=[self.model, self.vm], outputs=self.tokens_per_second)
115
- self.maxed_out = gr.Slider(minimum=1, value=80, label="% maxed out",
116
  info="How much the GPU is fully used.",
117
  interactive=True,
118
  visible=False)
119
 
120
- def compute_cost_per_token(self, tokens_per_second, maxed_out):
121
-
122
- return maxed_out
123
 
124
  class ModelPage:
125
  def __init__(self, Models: BaseTCOModel):
@@ -130,13 +139,20 @@ class ModelPage:
130
 
131
  def render(self):
132
  for model in self.models:
133
- model.render()
 
134
 
135
  def get_all_components(self) -> list[Component]:
136
  output = []
137
  for model in self.models:
138
  output += model.get_components()
139
  return output
 
 
 
 
 
 
140
 
141
  def make_model_visible(self, name:str):
142
  # First decide which indexes
@@ -152,9 +168,10 @@ class ModelPage:
152
  begin=0
153
  current_model = args[-1]
154
  for model in self.models:
155
- model_n_args = len(model.get_components())
156
- model_args = args[begin:begin+model_n_args]
157
  if current_model == model.get_name():
 
 
158
  model_tco = model.compute_cost_per_token(*model_args)
159
  return f"Model {current_model} has TCO {model_tco}"
160
- begin = begin+model_n_args
 
1
  from gradio.components import Component
2
  import gradio as gr
 
3
  from abc import ABC, abstractclassmethod
4
+ import inspect
5
 
6
  class BaseTCOModel(ABC):
7
  # TO DO: Find way to specify which component should be used for computing cost
 
16
  def get_components(self) -> list[Component]:
17
  return self._components
18
 
19
+ def get_components_for_cost_computing(self):
20
+ return self.components_for_cost_computing
21
+
22
  def get_name(self):
23
  return self.name
24
 
25
+ def register_components_for_cost_computing(self):
26
+ args = inspect.getfullargspec(self.compute_cost_per_token)[0][1:]
27
+ self.components_for_cost_computing = [self.__getattribute__(arg) for arg in args]
28
+
29
  @abstractclassmethod
30
  def compute_cost_per_token(self):
31
  pass
 
36
 
37
  def set_name(self, name):
38
  self.name = name
 
39
 
40
  class OpenAIModel(BaseTCOModel):
41
 
 
81
 
82
  return cost_per_output_token
83
 
84
+ class OpenSourceLlama2Model(BaseTCOModel):
85
  def __init__(self):
86
+ self.set_name("(Open source) Llama 2")
87
  super().__init__()
88
 
89
  def render(self):
90
  vm_choices = ["1x Nvidia A100 (Azure NC24ads A100 v4)",
91
  "2x Nvidia A100 (Azure NC48ads A100 v4)"]
92
+
93
  def on_model_change(model):
94
  if model == "Llama 2 7B":
95
  return gr.Dropdown.update(choices=vm_choices)
 
110
  visible=False,
111
  label="Instance of VM with GPU"
112
  )
113
+ self.vm_cost_per_hour = gr.Number(3.5, label="VM instance cost per hour",
114
+ interactive=True, visible=False)
115
+ self.tokens_per_second = gr.Number(900, visible=False,
116
  label="Number of tokens per second for this specific model and VM instance",
117
  interactive=False
118
  )
 
121
 
122
  self.model.change(on_model_change, inputs=self.model, outputs=self.vm)
123
  self.vm.change(on_vm_change, inputs=[self.model, self.vm], outputs=self.tokens_per_second)
124
+ self.maxed_out = gr.Slider(minimum=0.01, value=1., step=0.01, label="% maxed out",
125
  info="How much the GPU is fully used.",
126
  interactive=True,
127
  visible=False)
128
 
129
+ def compute_cost_per_token(self, vm_cost_per_hour, tokens_per_second, maxed_out):
130
+ cost_per_token = vm_cost_per_hour / (tokens_per_second * 3600 * maxed_out)
131
+ return cost_per_token
132
 
133
  class ModelPage:
134
  def __init__(self, Models: BaseTCOModel):
 
139
 
140
  def render(self):
141
  for model in self.models:
142
+ model.render()
143
+ model.register_components_for_cost_computing()
144
 
145
  def get_all_components(self) -> list[Component]:
146
  output = []
147
  for model in self.models:
148
  output += model.get_components()
149
  return output
150
+
151
+ def get_all_components_for_cost_computing(self) -> list[Component]:
152
+ output = []
153
+ for model in self.models:
154
+ output += model.get_components_for_cost_computing()
155
+ return output
156
 
157
  def make_model_visible(self, name:str):
158
  # First decide which indexes
 
168
  begin=0
169
  current_model = args[-1]
170
  for model in self.models:
171
+ model_n_args = len(model.get_components_for_cost_computing())
 
172
  if current_model == model.get_name():
173
+ model_args = args[begin:begin+model_n_args]
174
+ print("Model args: ",model_args)
175
  model_tco = model.compute_cost_per_token(*model_args)
176
  return f"Model {current_model} has TCO {model_tco}"
177
+ begin = begin+model_n_args