Spaces:
Runtime error
Runtime error
Commit
•
bdbd790
1
Parent(s):
2beb7b1
Update app.py
Browse files
app.py
CHANGED
@@ -65,10 +65,36 @@ with gr.Blocks() as demo:
|
|
65 |
with gr.Tabs():
|
66 |
# Memory Calculation Tab
|
67 |
with gr.TabItem("Memory Calculation"):
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
num_gpus = gr.Number(
|
73 |
label="Number of GPUs",
|
74 |
value=1,
|
@@ -89,31 +115,6 @@ with gr.Blocks() as demo:
|
|
89 |
value=8,
|
90 |
info="Batch size per GPU"
|
91 |
)
|
92 |
-
sequence_length = gr.Number(
|
93 |
-
label="Sequence Length",
|
94 |
-
value=2048,
|
95 |
-
info="Sequence length used for training"
|
96 |
-
)
|
97 |
-
vocab_size = gr.Number(
|
98 |
-
label="Vocab Size",
|
99 |
-
value=51200,
|
100 |
-
info="How many tokens are in the embedding layer"
|
101 |
-
)
|
102 |
-
hidden_size = gr.Number(
|
103 |
-
label="Hidden Size",
|
104 |
-
value=6144,
|
105 |
-
info="Dimension of the model's hidden size"
|
106 |
-
)
|
107 |
-
num_attention_heads = gr.Number(
|
108 |
-
label="Number of Attention Heads",
|
109 |
-
value=64,
|
110 |
-
info="Number of attention heads used in the model"
|
111 |
-
)
|
112 |
-
num_layers = gr.Number(
|
113 |
-
label="Number of Layers",
|
114 |
-
value=44,
|
115 |
-
info="Number of transformer layers used in the model"
|
116 |
-
)
|
117 |
ffn_expansion_factor = gr.Number(
|
118 |
label="FFN Expansion Factor",
|
119 |
value=4,
|
@@ -148,35 +149,36 @@ with gr.Blocks() as demo:
|
|
148 |
|
149 |
# Parameter Calculation Tab
|
150 |
with gr.TabItem("Parameter Calculation"):
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
tied_embeddings = gr.Checkbox(
|
161 |
label="Tied Embeddings",
|
162 |
value=False,
|
163 |
info="Whether embeddings are tied (shared between input and output)"
|
164 |
)
|
165 |
-
hidden_size = gr.Number(
|
166 |
-
label="Hidden Size",
|
167 |
-
value=6144,
|
168 |
-
info="Dimension of the model's hidden size"
|
169 |
-
)
|
170 |
-
sequence_length = gr.Number(
|
171 |
-
label="Sequence Length",
|
172 |
-
value=2048,
|
173 |
-
info="Sequence length used for training"
|
174 |
-
)
|
175 |
-
num_layers = gr.Number(
|
176 |
-
label="Number of Layers",
|
177 |
-
value=44,
|
178 |
-
info="Number of transformer layers used in the model"
|
179 |
-
)
|
180 |
ffn_expansion_factor = gr.Number(
|
181 |
label="FFN Expansion Factor",
|
182 |
value=4,
|
|
|
65 |
with gr.Tabs():
|
66 |
# Memory Calculation Tab
|
67 |
with gr.TabItem("Memory Calculation"):
|
68 |
+
with gr.group():
|
69 |
+
hf_model_name_or_path = gr.Textbox(
|
70 |
+
label="HuggingFace Model Name or Path",
|
71 |
+
info="Name of the HuggingFace Hub repository or the local file path for it"
|
72 |
+
)
|
73 |
+
sequence_length = gr.Number(
|
74 |
+
label="Sequence Length",
|
75 |
+
value=2048,
|
76 |
+
info="Sequence length used for training"
|
77 |
+
)
|
78 |
+
vocab_size = gr.Number(
|
79 |
+
label="Vocab Size",
|
80 |
+
value=51200,
|
81 |
+
info="How many tokens are in the embedding layer"
|
82 |
+
)
|
83 |
+
hidden_size = gr.Number(
|
84 |
+
label="Hidden Size",
|
85 |
+
value=6144,
|
86 |
+
info="Dimension of the model's hidden size"
|
87 |
+
)
|
88 |
+
num_attention_heads = gr.Number(
|
89 |
+
label="Number of Attention Heads",
|
90 |
+
value=64,
|
91 |
+
info="Number of attention heads used in the model"
|
92 |
+
)
|
93 |
+
num_layers = gr.Number(
|
94 |
+
label="Number of Layers",
|
95 |
+
value=44,
|
96 |
+
info="Number of transformer layers used in the model"
|
97 |
+
)
|
98 |
num_gpus = gr.Number(
|
99 |
label="Number of GPUs",
|
100 |
value=1,
|
|
|
115 |
value=8,
|
116 |
info="Batch size per GPU"
|
117 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
ffn_expansion_factor = gr.Number(
|
119 |
label="FFN Expansion Factor",
|
120 |
value=4,
|
|
|
149 |
|
150 |
# Parameter Calculation Tab
|
151 |
with gr.TabItem("Parameter Calculation"):
|
152 |
+
with gr.group():
|
153 |
+
hf_model_name_or_path = gr.Textbox(
|
154 |
+
label="HuggingFace Model Name or Path",
|
155 |
+
info="Name of the HuggingFace Hub repository or the local file path for it"
|
156 |
+
)
|
157 |
+
vocab_size = gr.Number(
|
158 |
+
label="Vocab Size",
|
159 |
+
value=51200,
|
160 |
+
info="How many tokens are in the embedding layer"
|
161 |
+
)
|
162 |
+
hidden_size = gr.Number(
|
163 |
+
label="Hidden Size",
|
164 |
+
value=6144,
|
165 |
+
info="Dimension of the model's hidden size"
|
166 |
+
)
|
167 |
+
sequence_length = gr.Number(
|
168 |
+
label="Sequence Length",
|
169 |
+
value=2048,
|
170 |
+
info="Sequence length used for training"
|
171 |
+
)
|
172 |
+
num_layers = gr.Number(
|
173 |
+
label="Number of Layers",
|
174 |
+
value=44,
|
175 |
+
info="Number of transformer layers used in the model"
|
176 |
+
)
|
177 |
tied_embeddings = gr.Checkbox(
|
178 |
label="Tied Embeddings",
|
179 |
value=False,
|
180 |
info="Whether embeddings are tied (shared between input and output)"
|
181 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
ffn_expansion_factor = gr.Number(
|
183 |
label="FFN Expansion Factor",
|
184 |
value=4,
|