derek-thomas HF staff commited on
Commit
24632bb
1 Parent(s): 58b581f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -126
app.py CHANGED
@@ -65,73 +65,74 @@ with gr.Blocks() as demo:
65
  with gr.Tabs():
66
  # Memory Calculation Tab
67
  with gr.TabItem("Memory Calculation"):
68
- with gr.Column("Generatable"):
69
- with gr.Group():
70
- hf_model_name_or_path = gr.Textbox(
71
- label="HuggingFace Model Name or Path",
72
- info="Name of the HuggingFace Hub repository or the local file path for it"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  )
74
- sequence_length = gr.Number(
75
- label="Sequence Length",
76
- value=2048,
77
- info="Sequence length used for training"
78
  )
79
- vocab_size = gr.Number(
80
- label="Vocab Size",
81
- value=51200,
82
- info="How many tokens are in the embedding layer"
83
  )
84
- hidden_size = gr.Number(
85
- label="Hidden Size",
86
- value=6144,
87
- info="Dimension of the model's hidden size"
88
  )
89
- num_attention_heads = gr.Number(
90
- label="Number of Attention Heads",
91
- value=64,
92
- info="Number of attention heads used in the model"
93
  )
94
- num_layers = gr.Number(
95
- label="Number of Layers",
96
- value=44,
97
- info="Number of transformer layers used in the model"
98
- )
99
- with gr.Column("User Defined"):
100
- num_gpus = gr.Number(
101
- label="Number of GPUs",
102
- value=1,
103
- info="Number of GPUs used for training"
104
- )
105
- tensor_parallel_size = gr.Number(
106
- label="Tensor Parallel Size",
107
- value=1,
108
- info="Tensor parallel degree (1 if not used)"
109
- )
110
- pipeline_parallel_size = gr.Number(
111
- label="Pipeline Parallel Size",
112
- value=1,
113
- info="Pipeline parallel degree (1 if not used)"
114
- )
115
- batch_size_per_gpu = gr.Number(
116
- label="Batch Size per GPU",
117
- value=8,
118
- info="Batch size per GPU"
119
- )
120
- ffn_expansion_factor = gr.Number(
121
- label="FFN Expansion Factor",
122
- value=4,
123
- info="How much the MLP hidden size expands"
124
- )
125
- is_mixed_precision = gr.Checkbox(
126
- label="Mixed Precision",
127
- value=True,
128
- info="Whether mixed precision is enabled"
129
- )
130
- misc_mem_gib = gr.Number(
131
- label="Miscellaneous Memory Overhead (GiB)",
132
- value=5,
133
- info="Miscellaneous memory overhead per GPU by DL frameworks, communication libraries, etc."
134
- )
135
 
136
  calc_memory_button = gr.Button("Calculate Memory")
137
  memory_result = gr.Textbox(label="Memory Calculation Result", interactive=False)
@@ -151,75 +152,76 @@ with gr.Blocks() as demo:
151
 
152
  # Parameter Calculation Tab
153
  with gr.TabItem("Parameter Calculation"):
154
- with gr.Column("Generatable"):
155
- with gr.Group():
156
- hf_model_name_or_path = gr.Textbox(
157
- label="HuggingFace Model Name or Path",
158
- info="Name of the HuggingFace Hub repository or the local file path for it"
159
- )
160
- vocab_size = gr.Number(
161
- label="Vocab Size",
162
- value=51200,
163
- info="How many tokens are in the embedding layer"
164
- )
165
- hidden_size = gr.Number(
166
- label="Hidden Size",
167
- value=6144,
168
- info="Dimension of the model's hidden size"
169
- )
170
- sequence_length = gr.Number(
171
- label="Sequence Length",
172
- value=2048,
173
- info="Sequence length used for training"
174
- )
175
- num_layers = gr.Number(
176
- label="Number of Layers",
177
- value=44,
178
- info="Number of transformer layers used in the model"
179
- )
180
- with gr.Column("User Defined"):
181
- tied_embeddings = gr.Checkbox(
182
- label="Tied Embeddings",
183
- value=False,
184
- info="Whether embeddings are tied (shared between input and output)"
185
- )
186
- ffn_expansion_factor = gr.Number(
187
- label="FFN Expansion Factor",
188
- value=4,
189
- info="How much the MLP hidden size expands"
190
- )
191
- num_mlp_linears = gr.Number(
192
- label="Number of Linear Layers per MLP Block",
193
- value=2,
194
- info="How many linear layers per MLP block"
195
- )
196
- kv_size_ratio = gr.Number(
197
- label="KV Size Ratio",
198
- value=1.0,
199
- info="Ratio of total query heads to key/value heads. 1.0 for MHA, 1/num_attention_heads for MQA"
200
- )
201
-
202
- with gr.Accordion("MoE Parameters", open=False):
203
- moe = gr.Checkbox(
204
- label="MoE",
205
  value=False,
206
- info="Whether the model is MoE"
207
  )
208
- num_experts = gr.Number(
209
- label="Number of Experts",
210
- value=8,
211
- info="Number of experts for MoE"
212
  )
213
- expert_interval = gr.Number(
214
- label="Expert Interval",
215
- value=1,
216
- info="Expert interval for MoE"
217
  )
218
- topk = gr.Number(
219
- label="Top k Routing",
220
- value=1,
221
- info="Top k routing for MoE"
222
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  calc_param_button = gr.Button("Calculate Parameters")
225
  param_result = gr.Textbox(label="Parameter Calculation Result", interactive=False)
 
65
  with gr.Tabs():
66
  # Memory Calculation Tab
67
  with gr.TabItem("Memory Calculation"):
68
+ with gr.Row():
69
+ with gr.Column("Generatable"):
70
+ with gr.Group():
71
+ hf_model_name_or_path = gr.Textbox(
72
+ label="HuggingFace Model Name or Path",
73
+ info="Name of the HuggingFace Hub repository or the local file path for it"
74
+ )
75
+ sequence_length = gr.Number(
76
+ label="Sequence Length",
77
+ value=2048,
78
+ info="Sequence length used for training"
79
+ )
80
+ vocab_size = gr.Number(
81
+ label="Vocab Size",
82
+ value=51200,
83
+ info="How many tokens are in the embedding layer"
84
+ )
85
+ hidden_size = gr.Number(
86
+ label="Hidden Size",
87
+ value=6144,
88
+ info="Dimension of the model's hidden size"
89
+ )
90
+ num_attention_heads = gr.Number(
91
+ label="Number of Attention Heads",
92
+ value=64,
93
+ info="Number of attention heads used in the model"
94
+ )
95
+ num_layers = gr.Number(
96
+ label="Number of Layers",
97
+ value=44,
98
+ info="Number of transformer layers used in the model"
99
+ )
100
+ with gr.Column("User Defined"):
101
+ num_gpus = gr.Number(
102
+ label="Number of GPUs",
103
+ value=1,
104
+ info="Number of GPUs used for training"
105
+ )
106
+ tensor_parallel_size = gr.Number(
107
+ label="Tensor Parallel Size",
108
+ value=1,
109
+ info="Tensor parallel degree (1 if not used)"
110
+ )
111
+ pipeline_parallel_size = gr.Number(
112
+ label="Pipeline Parallel Size",
113
+ value=1,
114
+ info="Pipeline parallel degree (1 if not used)"
115
  )
116
+ batch_size_per_gpu = gr.Number(
117
+ label="Batch Size per GPU",
118
+ value=8,
119
+ info="Batch size per GPU"
120
  )
121
+ ffn_expansion_factor = gr.Number(
122
+ label="FFN Expansion Factor",
123
+ value=4,
124
+ info="How much the MLP hidden size expands"
125
  )
126
+ is_mixed_precision = gr.Checkbox(
127
+ label="Mixed Precision",
128
+ value=True,
129
+ info="Whether mixed precision is enabled"
130
  )
131
+ misc_mem_gib = gr.Number(
132
+ label="Miscellaneous Memory Overhead (GiB)",
133
+ value=5,
134
+ info="Miscellaneous memory overhead per GPU by DL frameworks, communication libraries, etc."
135
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  calc_memory_button = gr.Button("Calculate Memory")
138
  memory_result = gr.Textbox(label="Memory Calculation Result", interactive=False)
 
152
 
153
  # Parameter Calculation Tab
154
  with gr.TabItem("Parameter Calculation"):
155
+ with gr.Row():
156
+ with gr.Column("Generatable"):
157
+ with gr.Group():
158
+ hf_model_name_or_path = gr.Textbox(
159
+ label="HuggingFace Model Name or Path",
160
+ info="Name of the HuggingFace Hub repository or the local file path for it"
161
+ )
162
+ vocab_size = gr.Number(
163
+ label="Vocab Size",
164
+ value=51200,
165
+ info="How many tokens are in the embedding layer"
166
+ )
167
+ hidden_size = gr.Number(
168
+ label="Hidden Size",
169
+ value=6144,
170
+ info="Dimension of the model's hidden size"
171
+ )
172
+ sequence_length = gr.Number(
173
+ label="Sequence Length",
174
+ value=2048,
175
+ info="Sequence length used for training"
176
+ )
177
+ num_layers = gr.Number(
178
+ label="Number of Layers",
179
+ value=44,
180
+ info="Number of transformer layers used in the model"
181
+ )
182
+ with gr.Column("User Defined"):
183
+ tied_embeddings = gr.Checkbox(
184
+ label="Tied Embeddings",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  value=False,
186
+ info="Whether embeddings are tied (shared between input and output)"
187
  )
188
+ ffn_expansion_factor = gr.Number(
189
+ label="FFN Expansion Factor",
190
+ value=4,
191
+ info="How much the MLP hidden size expands"
192
  )
193
+ num_mlp_linears = gr.Number(
194
+ label="Number of Linear Layers per MLP Block",
195
+ value=2,
196
+ info="How many linear layers per MLP block"
197
  )
198
+ kv_size_ratio = gr.Number(
199
+ label="KV Size Ratio",
200
+ value=1.0,
201
+ info="Ratio of total query heads to key/value heads. 1.0 for MHA, 1/num_attention_heads for MQA"
202
  )
203
+
204
+ with gr.Accordion("MoE Parameters", open=False):
205
+ moe = gr.Checkbox(
206
+ label="MoE",
207
+ value=False,
208
+ info="Whether the model is MoE"
209
+ )
210
+ num_experts = gr.Number(
211
+ label="Number of Experts",
212
+ value=8,
213
+ info="Number of experts for MoE"
214
+ )
215
+ expert_interval = gr.Number(
216
+ label="Expert Interval",
217
+ value=1,
218
+ info="Expert interval for MoE"
219
+ )
220
+ topk = gr.Number(
221
+ label="Top k Routing",
222
+ value=1,
223
+ info="Top k routing for MoE"
224
+ )
225
 
226
  calc_param_button = gr.Button("Calculate Parameters")
227
  param_result = gr.Textbox(label="Parameter Calculation Result", interactive=False)