derek-thomas HF staff commited on
Commit
58b581f
1 Parent(s): 54cda30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -124
app.py CHANGED
@@ -65,74 +65,76 @@ with gr.Blocks() as demo:
65
  with gr.Tabs():
66
  # Memory Calculation Tab
67
  with gr.TabItem("Memory Calculation"):
68
- with gr.Group():
69
- hf_model_name_or_path = gr.Textbox(
70
- label="HuggingFace Model Name or Path",
71
- info="Name of the HuggingFace Hub repository or the local file path for it"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  )
73
- sequence_length = gr.Number(
74
- label="Sequence Length",
75
- value=2048,
76
- info="Sequence length used for training"
77
  )
78
- vocab_size = gr.Number(
79
- label="Vocab Size",
80
- value=51200,
81
- info="How many tokens are in the embedding layer"
82
  )
83
- hidden_size = gr.Number(
84
- label="Hidden Size",
85
- value=6144,
86
- info="Dimension of the model's hidden size"
87
  )
88
- num_attention_heads = gr.Number(
89
- label="Number of Attention Heads",
90
- value=64,
91
- info="Number of attention heads used in the model"
 
 
 
 
 
 
 
 
 
 
92
  )
93
- num_layers = gr.Number(
94
- label="Number of Layers",
95
- value=44,
96
- info="Number of transformer layers used in the model"
97
- )
98
- num_gpus = gr.Number(
99
- label="Number of GPUs",
100
- value=1,
101
- info="Number of GPUs used for training"
102
- )
103
- tensor_parallel_size = gr.Number(
104
- label="Tensor Parallel Size",
105
- value=1,
106
- info="Tensor parallel degree (1 if not used)"
107
- )
108
- pipeline_parallel_size = gr.Number(
109
- label="Pipeline Parallel Size",
110
- value=1,
111
- info="Pipeline parallel degree (1 if not used)"
112
- )
113
- batch_size_per_gpu = gr.Number(
114
- label="Batch Size per GPU",
115
- value=8,
116
- info="Batch size per GPU"
117
- )
118
- ffn_expansion_factor = gr.Number(
119
- label="FFN Expansion Factor",
120
- value=4,
121
- info="How much the MLP hidden size expands"
122
- )
123
- is_mixed_precision = gr.Checkbox(
124
- label="Mixed Precision",
125
- value=True,
126
- info="Whether mixed precision is enabled"
127
- )
128
- misc_mem_gib = gr.Number(
129
- label="Miscellaneous Memory Overhead (GiB)",
130
- value=5,
131
- info="Miscellaneous memory overhead per GPU by DL frameworks, communication libraries, etc."
132
- )
133
 
134
- memory_result = gr.Textbox(label="Memory Calculation Result", interactive=False)
135
  calc_memory_button = gr.Button("Calculate Memory")
 
136
  calc_memory_button.click(
137
  calc_mem,
138
  inputs=[
@@ -149,76 +151,78 @@ with gr.Blocks() as demo:
149
 
150
  # Parameter Calculation Tab
151
  with gr.TabItem("Parameter Calculation"):
152
- with gr.Group():
153
- hf_model_name_or_path = gr.Textbox(
154
- label="HuggingFace Model Name or Path",
155
- info="Name of the HuggingFace Hub repository or the local file path for it"
156
- )
157
- vocab_size = gr.Number(
158
- label="Vocab Size",
159
- value=51200,
160
- info="How many tokens are in the embedding layer"
161
- )
162
- hidden_size = gr.Number(
163
- label="Hidden Size",
164
- value=6144,
165
- info="Dimension of the model's hidden size"
166
- )
167
- sequence_length = gr.Number(
168
- label="Sequence Length",
169
- value=2048,
170
- info="Sequence length used for training"
171
- )
172
- num_layers = gr.Number(
173
- label="Number of Layers",
174
- value=44,
175
- info="Number of transformer layers used in the model"
176
- )
177
- tied_embeddings = gr.Checkbox(
178
- label="Tied Embeddings",
179
- value=False,
180
- info="Whether embeddings are tied (shared between input and output)"
181
- )
182
- ffn_expansion_factor = gr.Number(
183
- label="FFN Expansion Factor",
184
- value=4,
185
- info="How much the MLP hidden size expands"
186
- )
187
- num_mlp_linears = gr.Number(
188
- label="Number of Linear Layers per MLP Block",
189
- value=2,
190
- info="How many linear layers per MLP block"
191
- )
192
- kv_size_ratio = gr.Number(
193
- label="KV Size Ratio",
194
- value=1.0,
195
- info="Ratio of total query heads to key/value heads. 1.0 for MHA, 1/num_attention_heads for MQA"
196
- )
197
-
198
- with gr.Accordion("MoE Parameters", open=False):
199
- moe = gr.Checkbox(
200
- label="MoE",
201
  value=False,
202
- info="Whether the model is MoE"
203
  )
204
- num_experts = gr.Number(
205
- label="Number of Experts",
206
- value=8,
207
- info="Number of experts for MoE"
208
  )
209
- expert_interval = gr.Number(
210
- label="Expert Interval",
211
- value=1,
212
- info="Expert interval for MoE"
213
  )
214
- topk = gr.Number(
215
- label="Top k Routing",
216
- value=1,
217
- info="Top k routing for MoE"
218
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
- param_result = gr.Textbox(label="Parameter Calculation Result", interactive=False)
221
  calc_param_button = gr.Button("Calculate Parameters")
 
222
  calc_param_button.click(calc_params,
223
  inputs=[vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio],
224
  outputs=param_result)
 
65
  with gr.Tabs():
66
  # Memory Calculation Tab
67
  with gr.TabItem("Memory Calculation"):
68
+ with gr.Column("Generatable"):
69
+ with gr.Group():
70
+ hf_model_name_or_path = gr.Textbox(
71
+ label="HuggingFace Model Name or Path",
72
+ info="Name of the HuggingFace Hub repository or the local file path for it"
73
+ )
74
+ sequence_length = gr.Number(
75
+ label="Sequence Length",
76
+ value=2048,
77
+ info="Sequence length used for training"
78
+ )
79
+ vocab_size = gr.Number(
80
+ label="Vocab Size",
81
+ value=51200,
82
+ info="How many tokens are in the embedding layer"
83
+ )
84
+ hidden_size = gr.Number(
85
+ label="Hidden Size",
86
+ value=6144,
87
+ info="Dimension of the model's hidden size"
88
+ )
89
+ num_attention_heads = gr.Number(
90
+ label="Number of Attention Heads",
91
+ value=64,
92
+ info="Number of attention heads used in the model"
93
+ )
94
+ num_layers = gr.Number(
95
+ label="Number of Layers",
96
+ value=44,
97
+ info="Number of transformer layers used in the model"
98
+ )
99
+ with gr.Column("User Defined"):
100
+ num_gpus = gr.Number(
101
+ label="Number of GPUs",
102
+ value=1,
103
+ info="Number of GPUs used for training"
104
  )
105
+ tensor_parallel_size = gr.Number(
106
+ label="Tensor Parallel Size",
107
+ value=1,
108
+ info="Tensor parallel degree (1 if not used)"
109
  )
110
+ pipeline_parallel_size = gr.Number(
111
+ label="Pipeline Parallel Size",
112
+ value=1,
113
+ info="Pipeline parallel degree (1 if not used)"
114
  )
115
+ batch_size_per_gpu = gr.Number(
116
+ label="Batch Size per GPU",
117
+ value=8,
118
+ info="Batch size per GPU"
119
  )
120
+ ffn_expansion_factor = gr.Number(
121
+ label="FFN Expansion Factor",
122
+ value=4,
123
+ info="How much the MLP hidden size expands"
124
+ )
125
+ is_mixed_precision = gr.Checkbox(
126
+ label="Mixed Precision",
127
+ value=True,
128
+ info="Whether mixed precision is enabled"
129
+ )
130
+ misc_mem_gib = gr.Number(
131
+ label="Miscellaneous Memory Overhead (GiB)",
132
+ value=5,
133
+ info="Miscellaneous memory overhead per GPU by DL frameworks, communication libraries, etc."
134
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
 
136
  calc_memory_button = gr.Button("Calculate Memory")
137
+ memory_result = gr.Textbox(label="Memory Calculation Result", interactive=False)
138
  calc_memory_button.click(
139
  calc_mem,
140
  inputs=[
 
151
 
152
  # Parameter Calculation Tab
153
  with gr.TabItem("Parameter Calculation"):
154
+ with gr.Column("Generatable"):
155
+ with gr.Group():
156
+ hf_model_name_or_path = gr.Textbox(
157
+ label="HuggingFace Model Name or Path",
158
+ info="Name of the HuggingFace Hub repository or the local file path for it"
159
+ )
160
+ vocab_size = gr.Number(
161
+ label="Vocab Size",
162
+ value=51200,
163
+ info="How many tokens are in the embedding layer"
164
+ )
165
+ hidden_size = gr.Number(
166
+ label="Hidden Size",
167
+ value=6144,
168
+ info="Dimension of the model's hidden size"
169
+ )
170
+ sequence_length = gr.Number(
171
+ label="Sequence Length",
172
+ value=2048,
173
+ info="Sequence length used for training"
174
+ )
175
+ num_layers = gr.Number(
176
+ label="Number of Layers",
177
+ value=44,
178
+ info="Number of transformer layers used in the model"
179
+ )
180
+ with gr.Column("User Defined"):
181
+ tied_embeddings = gr.Checkbox(
182
+ label="Tied Embeddings",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  value=False,
184
+ info="Whether embeddings are tied (shared between input and output)"
185
  )
186
+ ffn_expansion_factor = gr.Number(
187
+ label="FFN Expansion Factor",
188
+ value=4,
189
+ info="How much the MLP hidden size expands"
190
  )
191
+ num_mlp_linears = gr.Number(
192
+ label="Number of Linear Layers per MLP Block",
193
+ value=2,
194
+ info="How many linear layers per MLP block"
195
  )
196
+ kv_size_ratio = gr.Number(
197
+ label="KV Size Ratio",
198
+ value=1.0,
199
+ info="Ratio of total query heads to key/value heads. 1.0 for MHA, 1/num_attention_heads for MQA"
200
  )
201
+
202
+ with gr.Accordion("MoE Parameters", open=False):
203
+ moe = gr.Checkbox(
204
+ label="MoE",
205
+ value=False,
206
+ info="Whether the model is MoE"
207
+ )
208
+ num_experts = gr.Number(
209
+ label="Number of Experts",
210
+ value=8,
211
+ info="Number of experts for MoE"
212
+ )
213
+ expert_interval = gr.Number(
214
+ label="Expert Interval",
215
+ value=1,
216
+ info="Expert interval for MoE"
217
+ )
218
+ topk = gr.Number(
219
+ label="Top k Routing",
220
+ value=1,
221
+ info="Top k routing for MoE"
222
+ )
223
 
 
224
  calc_param_button = gr.Button("Calculate Parameters")
225
+ param_result = gr.Textbox(label="Parameter Calculation Result", interactive=False)
226
  calc_param_button.click(calc_params,
227
  inputs=[vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio],
228
  outputs=param_result)