lvwerra HF staff commited on
Commit
ed4871a
1 Parent(s): 4cf381c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -12,8 +12,8 @@ beta = 0.283
12
  Bn = 10**9
13
 
14
  G = ((alpha*A)/(beta*B))**(1/(alpha+beta))
15
- ###
16
 
 
17
  def to_flops(N, D):
18
  return 6 * N * D
19
 
@@ -80,7 +80,10 @@ Inference cost fraction:\t {kn*100:.2f}%"""
80
  return text, fig
81
 
82
  with gr.Blocks() as demo:
83
- gr.Markdown("# Harm's law")
 
 
 
84
  N = gr.Number(value=1, label="Model size (in B parameters):")
85
  D = gr.Number(value=100, label="Dataset size (in B tokens):")
86
  button = gr.Button("Compute!")
 
12
  Bn = 10**9
13
 
14
  G = ((alpha*A)/(beta*B))**(1/(alpha+beta))
 
15
 
16
+ ### FUNCTIONS
17
  def to_flops(N, D):
18
  return 6 * N * D
19
 
 
80
  return text, fig
81
 
82
  with gr.Blocks() as demo:
83
+ gr.Markdown("# Harm's law\
84
+ The Chinchilla scaling laws focus on optimally scaling training compute but often we also care about inference cost.
85
+ This tool follows [Harm de Vries' blog post](https://www.harmdevries.com/post/model-size-vs-compute-overhead/) and visualizes the tradeoff between training comput and inference cost (i.e. model size).
86
+ ")
87
  N = gr.Number(value=1, label="Model size (in B parameters):")
88
  D = gr.Number(value=100, label="Dataset size (in B tokens):")
89
  button = gr.Button("Compute!")