lvwerra HF staff commited on
Commit
2cddfc6
1 Parent(s): 13461c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -78,23 +78,31 @@ def compute(N, D):
78
 
79
  text = f"""\
80
  ## Compute:
81
- Compute budget (TFLOPs): {C:.2E}
 
 
82
 
83
  ## Chinchilla optimal:
84
- Optimal model size:\t\t {N_opt/Bn:.2f}B
 
 
85
 
86
- Optimal datset size (tokens):\t {D_opt/Bn:.2f}
87
 
88
  ## Your setting trade-off:
89
- Training compute overhead:\t {100*compute_overhead(kn, kd):.2f}%
90
 
91
- Inference cost fraction:\t {kn*100:.2f}%"""
 
 
92
  return text, fig
93
 
94
  with gr.Blocks() as demo:
95
  gr.Markdown(INTRO)
96
- N = gr.Number(value=1, label="Model size (in B parameters):")
97
- D = gr.Number(value=100, label="Dataset size (in B tokens):")
 
 
98
  button = gr.Button("Compute!")
99
 
100
  plot = gr.Plot(value=plt)
 
78
 
79
  text = f"""\
80
  ## Compute:
81
+ Your specificied setting corresponds to the following training compute budget.
82
+
83
+ **Compute budget (TFLOPs): {C:.2E}**
84
 
85
  ## Chinchilla optimal:
86
+ If you are optimizeing for model performance and ignore inference cost this is the optimal setting for training:
87
+
88
+ **Optimal model size: {N_opt/Bn:.2f}B**
89
 
90
+ **Optimal datset size (tokens): {D_opt/Bn:.2f}**
91
 
92
  ## Your setting trade-off:
93
+ In both cases 100% corresponds to the compute optimal model.
94
 
95
+ **Training compute overhead: {100*compute_overhead(kn, kd):.2f}%**
96
+
97
+ **Inference cost fraction: {kn*100:.2f}%**"""
98
  return text, fig
99
 
100
  with gr.Blocks() as demo:
101
  gr.Markdown(INTRO)
102
+ with gr.Row():
103
+ N = gr.Number(value=1, label="Model size (in B parameters):")
104
+ D = gr.Number(value=100, label="Dataset size (in B tokens):")
105
+
106
  button = gr.Button("Compute!")
107
 
108
  plot = gr.Plot(value=plt)