Spaces:

lvwerra
/

harms-law

Sleeping

lvwerra HF staff commited on Jul 24, 2023

Commit

2cddfc6

•

1 Parent(s): 13461c8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -78,23 +78,31 @@ def compute(N, D):
     text = f"""\
 ## Compute:
-Compute budget (TFLOPs): {C:.2E}
 ## Chinchilla optimal:
-Optimal model size:\t\t {N_opt/Bn:.2f}B
-Optimal datset size (tokens):\t {D_opt/Bn:.2f}
 ## Your setting trade-off:
-Training compute overhead:\t {100*compute_overhead(kn, kd):.2f}%
-Inference cost fraction:\t {kn*100:.2f}%"""
     return text, fig
 with gr.Blocks() as demo:
     gr.Markdown(INTRO)
-    N = gr.Number(value=1, label="Model size (in B parameters):")
-    D = gr.Number(value=100, label="Dataset size (in B tokens):")
     button = gr.Button("Compute!")
     plot = gr.Plot(value=plt)

     text = f"""\
 ## Compute:
+Your specificied setting corresponds to the following training compute budget.
+**Compute budget (TFLOPs): {C:.2E}**
 ## Chinchilla optimal:
+If you are optimizeing for model performance and ignore inference cost this is the optimal setting for training:
+**Optimal model size: {N_opt/Bn:.2f}B**
+**Optimal datset size (tokens): {D_opt/Bn:.2f}**
 ## Your setting trade-off:
+In both cases 100% corresponds to the compute optimal model.
+**Training compute overhead: {100*compute_overhead(kn, kd):.2f}%**
+**Inference cost fraction: {kn*100:.2f}%**"""
     return text, fig
 with gr.Blocks() as demo:
     gr.Markdown(INTRO)
+    with gr.Row():
+        N = gr.Number(value=1, label="Model size (in B parameters):")
+        D = gr.Number(value=100, label="Dataset size (in B tokens):")
     button = gr.Button("Compute!")
     plot = gr.Plot(value=plt)