Spaces:

lvwerra
/

harms-law

Running

lvwerra HF staff commited on Jul 24, 2023

Commit

ed4871a

•

1 Parent(s): 4cf381c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,8 +12,8 @@ beta = 0.283
 Bn = 10**9
 G = ((alpha*A)/(beta*B))**(1/(alpha+beta))
-###
 def to_flops(N, D):
     return 6 * N * D
@@ -80,7 +80,10 @@ Inference cost fraction:\t {kn*100:.2f}%"""
     return text, fig
 with gr.Blocks() as demo:
-    gr.Markdown("# Harm's law")
     N = gr.Number(value=1, label="Model size (in B parameters):")
     D = gr.Number(value=100, label="Dataset size (in B tokens):")
     button = gr.Button("Compute!")

 Bn = 10**9
 G = ((alpha*A)/(beta*B))**(1/(alpha+beta))
+### FUNCTIONS
 def to_flops(N, D):
     return 6 * N * D
     return text, fig
 with gr.Blocks() as demo:
+    gr.Markdown("# Harm's law\
+The Chinchilla scaling laws focus on optimally scaling training compute but often we also care about inference cost.
+This tool follows [Harm de Vries' blog post](https://www.harmdevries.com/post/model-size-vs-compute-overhead/) and visualizes the tradeoff between training comput and inference cost (i.e. model size).
+")
     N = gr.Number(value=1, label="Model size (in B parameters):")
     D = gr.Number(value=100, label="Dataset size (in B tokens):")
     button = gr.Button("Compute!")