Fix the calculation of bubble rate
Browse files- app.py +15 -7
- description1.md +5 -5
app.py
CHANGED
@@ -58,12 +58,16 @@ def get_schedule_image(result, max_time):
|
|
58 |
|
59 |
|
60 |
def calculate(p, m, f, b, w, c, mem):
|
|
|
|
|
|
|
61 |
baseline_result = hand_schedule.get_hand_schedule(p, m, f, b + w, 0, c)
|
62 |
baseline_result = [
|
63 |
list(filter(lambda x: x.type in {'F', 'B'}, r)) for r in baseline_result
|
64 |
]
|
65 |
baseline_time = get_schedule_time(baseline_result)
|
66 |
-
baseline_bubble=percentage(baseline_time/(f+b+w)/m - 1)
|
|
|
67 |
baseline_mem = get_memory_usage(baseline_result)
|
68 |
baseline_acceleration=percentage(0)
|
69 |
|
@@ -76,7 +80,8 @@ def calculate(p, m, f, b, w, c, mem):
|
|
76 |
|
77 |
adapt_time = get_schedule_time(adapt_result)
|
78 |
adapt_mem = get_memory_usage(adapt_result) / 2
|
79 |
-
adapt_bubble=percentage(adapt_time/(f+b+w)/m - 1)
|
|
|
80 |
adapt_acceleration=percentage(baseline_time/adapt_time - 1) if baseline_time is not None else None
|
81 |
|
82 |
schedule1f1bv_result = schedule1f1bv.schedule(
|
@@ -87,7 +92,8 @@ def calculate(p, m, f, b, w, c, mem):
|
|
87 |
|
88 |
schedule1f1bv_time = get_schedule_time(schedule1f1bv_result)
|
89 |
schedule1f1bv_mem = get_memory_usage(schedule1f1bv_result) / 2
|
90 |
-
schedule1f1bv_bubble=percentage(schedule1f1bv_time/(f+b+w)/m - 1)
|
|
|
91 |
schedule1f1bv_acceleration=percentage(baseline_time/schedule1f1bv_time - 1) if baseline_time is not None else None
|
92 |
|
93 |
type2_result = type2.schedule(
|
@@ -98,7 +104,8 @@ def calculate(p, m, f, b, w, c, mem):
|
|
98 |
|
99 |
type2_time = get_schedule_time(type2_result)
|
100 |
type2_mem = get_memory_usage(type2_result)
|
101 |
-
type2_bubble=percentage(type2_time/(f+b+w)/m - 1)
|
|
|
102 |
type2_acceleration=percentage(baseline_time/type2_time - 1) if baseline_time is not None else None
|
103 |
|
104 |
interleaved_result = interleaved_variant.get_interleaved_variation(
|
@@ -109,7 +116,8 @@ def calculate(p, m, f, b, w, c, mem):
|
|
109 |
|
110 |
interleaved_time = get_schedule_time(interleaved_result)
|
111 |
interleaved_mem = get_memory_usage(interleaved_result) / 2
|
112 |
-
interleaved_bubble=percentage(interleaved_time/(f+b+w)/m - 1)
|
|
|
113 |
interleaved_acceleration=percentage(baseline_time/interleaved_time - 1) if baseline_time is not None else None
|
114 |
|
115 |
|
@@ -218,7 +226,7 @@ with gr.Blocks() as demo:
|
|
218 |
with gr.Column(scale=1):
|
219 |
type2_acceleration=gr.Textbox("", label="Acceleration compared to 1F1B")
|
220 |
type2_mem=gr.Textbox("", label="Maximum memory usage")
|
221 |
-
type2_bubble=gr.Textbox("", label="Bubble Rate
|
222 |
with gr.Column(scale=4):
|
223 |
type2_image=gr.Image(None, interactive=False, label="Schedule Image", show_label=False)
|
224 |
with gr.Group():
|
@@ -227,7 +235,7 @@ with gr.Blocks() as demo:
|
|
227 |
with gr.Column(scale=1):
|
228 |
interleaved_acceleration=gr.Textbox("", label="Acceleration compared to 1F1B")
|
229 |
interleaved_mem=gr.Textbox("", label="Maximum memory usage")
|
230 |
-
interleaved_bubble=gr.Textbox("", label="Bubble Rate
|
231 |
with gr.Column(scale=4):
|
232 |
interleaved_image=gr.Image(None, interactive=False, label="Schedule Image", show_label=False)
|
233 |
button.click(calculate, inputs=[p, m, f, b, w, c, mem], outputs=[baseline_acceleration, baseline_mem, baseline_bubble, baseline_image,
|
|
|
58 |
|
59 |
|
60 |
def calculate(p, m, f, b, w, c, mem):
|
61 |
+
def get_bubble_rate(_time):
|
62 |
+
return 1 - ((f + b + w) * m / _time)
|
63 |
+
|
64 |
baseline_result = hand_schedule.get_hand_schedule(p, m, f, b + w, 0, c)
|
65 |
baseline_result = [
|
66 |
list(filter(lambda x: x.type in {'F', 'B'}, r)) for r in baseline_result
|
67 |
]
|
68 |
baseline_time = get_schedule_time(baseline_result)
|
69 |
+
# baseline_bubble=percentage(baseline_time/(f+b+w)/m - 1)
|
70 |
+
baseline_bubble=percentage(get_bubble_rate(baseline_time))
|
71 |
baseline_mem = get_memory_usage(baseline_result)
|
72 |
baseline_acceleration=percentage(0)
|
73 |
|
|
|
80 |
|
81 |
adapt_time = get_schedule_time(adapt_result)
|
82 |
adapt_mem = get_memory_usage(adapt_result) / 2
|
83 |
+
# adapt_bubble=percentage(adapt_time/(f+b+w)/m - 1)
|
84 |
+
adapt_bubble=percentage(get_bubble_rate(adapt_time))
|
85 |
adapt_acceleration=percentage(baseline_time/adapt_time - 1) if baseline_time is not None else None
|
86 |
|
87 |
schedule1f1bv_result = schedule1f1bv.schedule(
|
|
|
92 |
|
93 |
schedule1f1bv_time = get_schedule_time(schedule1f1bv_result)
|
94 |
schedule1f1bv_mem = get_memory_usage(schedule1f1bv_result) / 2
|
95 |
+
# schedule1f1bv_bubble=percentage(schedule1f1bv_time/(f+b+w)/m - 1)
|
96 |
+
schedule1f1bv_bubble=percentage(get_bubble_rate(schedule1f1bv_time))
|
97 |
schedule1f1bv_acceleration=percentage(baseline_time/schedule1f1bv_time - 1) if baseline_time is not None else None
|
98 |
|
99 |
type2_result = type2.schedule(
|
|
|
104 |
|
105 |
type2_time = get_schedule_time(type2_result)
|
106 |
type2_mem = get_memory_usage(type2_result)
|
107 |
+
# type2_bubble=percentage(type2_time/(f+b+w)/m - 1)
|
108 |
+
type2_bubble=percentage(get_bubble_rate(type2_time))
|
109 |
type2_acceleration=percentage(baseline_time/type2_time - 1) if baseline_time is not None else None
|
110 |
|
111 |
interleaved_result = interleaved_variant.get_interleaved_variation(
|
|
|
116 |
|
117 |
interleaved_time = get_schedule_time(interleaved_result)
|
118 |
interleaved_mem = get_memory_usage(interleaved_result) / 2
|
119 |
+
# interleaved_bubble=percentage(interleaved_time/(f+b+w)/m - 1)
|
120 |
+
interleaved_bubble=percentage(get_bubble_rate(interleaved_time))
|
121 |
interleaved_acceleration=percentage(baseline_time/interleaved_time - 1) if baseline_time is not None else None
|
122 |
|
123 |
|
|
|
226 |
with gr.Column(scale=1):
|
227 |
type2_acceleration=gr.Textbox("", label="Acceleration compared to 1F1B")
|
228 |
type2_mem=gr.Textbox("", label="Maximum memory usage")
|
229 |
+
type2_bubble=gr.Textbox("", label="Bubble Rate")
|
230 |
with gr.Column(scale=4):
|
231 |
type2_image=gr.Image(None, interactive=False, label="Schedule Image", show_label=False)
|
232 |
with gr.Group():
|
|
|
235 |
with gr.Column(scale=1):
|
236 |
interleaved_acceleration=gr.Textbox("", label="Acceleration compared to 1F1B")
|
237 |
interleaved_mem=gr.Textbox("", label="Maximum memory usage")
|
238 |
+
interleaved_bubble=gr.Textbox("", label="Bubble Rate")
|
239 |
with gr.Column(scale=4):
|
240 |
interleaved_image=gr.Image(None, interactive=False, label="Schedule Image", show_label=False)
|
241 |
button.click(calculate, inputs=[p, m, f, b, w, c, mem], outputs=[baseline_acceleration, baseline_mem, baseline_bubble, baseline_image,
|
description1.md
CHANGED
@@ -7,10 +7,10 @@ From our findings, we need approximately 1/3 memory under ideal conditions (F, B
|
|
7 |
Check out our paper at [Arxiv](https://arxiv.org/abs/2405.15362).
|
8 |
|
9 |
|
10 |
-
| Comparison assuming T_F=T_B=T_W | 1F1B
|
11 |
-
| -----------------------------------------------------
|
12 |
-
| Bubble Rate |
|
13 |
-
| Activation Memory <br> (Compared to 1F1B) |
|
14 |
|
15 |
|
16 |
-
Bubble Rate here is calculated as
|
|
|
7 |
Check out our paper at [Arxiv](https://arxiv.org/abs/2405.15362).
|
8 |
|
9 |
|
10 |
+
| Comparison assuming T_F=T_B=T_W | 1F1B | V-Min | V-Half | V-ZB |
|
11 |
+
| ----------------------------------------------------- |-------|------- | ---------- | ---- |
|
12 |
+
| Bubble Rate | ~ p/m | ~ 2p/3m | ~ p/ 2m | 0 |
|
13 |
+
| Activation Memory <br> (Compared to 1F1B) | p | (p+4)/3 | (p+2)/2 | p |
|
14 |
|
15 |
|
16 |
+
Bubble Rate here is calculated as `1 - (F+B+W)*m / longest_stage_time`.
|