video-promo/continuous_batching.py

525 lines
24 KiB
Python

"""AstrAI promo: Continuous Batching — state-machine driven batch rotation.
Shows a 4-state FSM (Cleanup → Refill → Prefill → Decode → Loop → Cleanup)
with coloured batch tokens flowing through states, entering & leaving continuously.
"""
from manim import *
Text.set_default(font="Times New Roman")
# ── palette ──
PHASE_COLORS = {
"Cleanup": GRAY,
"Refill": ORANGE,
"Prefill": BLUE,
"Decode": YELLOW,
}
BATCH_COLORS = [YELLOW, ORANGE, PINK, TEAL, GREEN, PURPLE, GOLD, MAROON]
class ContinuousBatching(Scene):
def construct(self):
# ═══════════════════════════════════════════════════
# 0. Title
# ═══════════════════════════════════════════════════
title = Text("Continuous Batching", font_size=48, color=BLUE)
self.play(Write(title))
self.wait(0.4)
self.play(title.animate.to_edge(UP).scale(0.55))
bar = Line(LEFT * 7, RIGHT * 7, color=GRAY).next_to(title, DOWN, buff=0.15)
self.play(Create(bar))
# ═══════════════════════════════════════════════════
# 1. Build state-machine layout (vertical, 4 states)
# ═══════════════════════════════════════════════════
state_names = ["Cleanup", "Refill", "Prefill", "Decode"]
states = VGroup()
trans_arrows = VGroup()
for i, name in enumerate(state_names):
box = RoundedRectangle(
width=3.6, height=0.8, corner_radius=0.15,
color=PHASE_COLORS[name], fill_opacity=0.12, stroke_width=2.5,
)
lbl = Text(name, font_size=20, color=PHASE_COLORS[name])
states.add(VGroup(box, lbl))
states.arrange(DOWN, buff=0.3)
states.shift(LEFT * 3.8 + DOWN * 0.5)
for i in range(1, 4):
a = Arrow(
states[i - 1].get_bottom(), states[i].get_top(),
color=LIGHT_GRAY, buff=0.06,
max_tip_length_to_length_ratio=0.22,
)
trans_arrows.add(a)
for i in range(4):
self.play(Create(states[i]))
if i > 0:
self.play(Create(trans_arrows[i - 1]))
# loop arrow — Decode returns to Cleanup (multiturn decoding)
loop = CurvedArrow(
states[-1].get_right() + RIGHT * 0.2,
states[0].get_right() + RIGHT * 0.2,
color=LIGHT_GRAY, angle=PI / 2,
)
loop_lbl = Text("per token", font_size=11, color=GRAY).next_to(loop, RIGHT, buff=0.08)
self.play(Create(loop), Write(loop_lbl))
self.wait(0.4)
# ═══════════════════════════════════════════════════
# 2. Boot tokens — initial batches placed at mid-cycle
# ═══════════════════════════════════════════════════
def make_token(name: str, col: str) -> VGroup:
card = RoundedRectangle(width=0.65, height=0.38, corner_radius=0.08,
color=col, fill_opacity=0.35, stroke_width=1.8)
txt = Text(name, font_size=13, color=col)
return VGroup(card, txt)
tokens = {
"A": make_token("A", BATCH_COLORS[0]),
"B": make_token("B", BATCH_COLORS[1]),
"C": make_token("C", BATCH_COLORS[2]),
}
# all three at consecutive stages, Prefill is the entry point
tokens["A"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill
tokens["B"].move_to(states[3]).shift(RIGHT * 1.5) # Decode
tokens["C"].move_to(states[0]).shift(RIGHT * 1.5) # Cleanup
for t in tokens.values():
self.play(FadeIn(t, scale=0.7), run_time=0.25)
self.wait(0.2)
note = Text("Every request starts at Prefill", font_size=16, color=WHITE) \
.next_to(states, DOWN, buff=0.55)
self.play(Write(note))
self.wait(1.0)
self.play(FadeOut(note))
# ═══════════════════════════════════════════════════
# 3. Tick 1 — advance, C exits, new D enters at Prefill
# ═══════════════════════════════════════════════════
slots = [
states[0].get_center() + RIGHT * 1.5, # Cleanup
states[1].get_center() + RIGHT * 1.5, # Refill
states[2].get_center() + RIGHT * 1.5, # Prefill
states[3].get_center() + RIGHT * 1.5, # Decode
]
self.play(
tokens["A"].animate.move_to(slots[3]), # Prefill → Decode
tokens["B"].animate.move_to(slots[0]), # Decode → Cleanup
tokens["C"].animate.move_to(slots[1]), # Cleanup → Refill
)
self.wait(0.3)
# C (now at Refill) exits after completing the loop
# new D enters at Prefill
self.play(FadeOut(tokens["C"], scale=0.6))
tokens["D"] = make_token("D", BATCH_COLORS[3])
tokens["D"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
self.play(FadeIn(tokens["D"], scale=0.7))
self.wait(0.25)
# ═══════════════════════════════════════════════════
# 4. Tick 2 — advance, B exits, new E enters at Prefill
# ═══════════════════════════════════════════════════
self.play(
tokens["D"].animate.move_to(slots[3]), # Prefill → Decode
tokens["A"].animate.move_to(slots[0]), # Decode → Cleanup
tokens["B"].animate.move_to(slots[1]), # Cleanup → Refill
)
self.wait(0.3)
self.play(FadeOut(tokens["B"], scale=0.6))
tokens["E"] = make_token("E", BATCH_COLORS[4])
tokens["E"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
self.play(FadeIn(tokens["E"], scale=0.7))
self.wait(0.25)
# ═══════════════════════════════════════════════════
# 5. Tick 3 — advance, A exits, new F enters at Prefill
# ═══════════════════════════════════════════════════
self.play(
tokens["E"].animate.move_to(slots[3]), # Prefill → Decode
tokens["D"].animate.move_to(slots[0]), # Decode → Cleanup
tokens["A"].animate.move_to(slots[1]), # Cleanup → Refill
)
self.wait(0.25)
self.play(FadeOut(tokens["A"], scale=0.6))
tokens["F"] = make_token("F", BATCH_COLORS[5])
tokens["F"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
self.play(FadeIn(tokens["F"], scale=0.7))
self.wait(0.25)
# ═══════════════════════════════════════════════════
# 6. Tick 4 — advance, F exits, new G enters at Prefill
# ═══════════════════════════════════════════════════
self.play(
tokens["F"].animate.move_to(slots[3]), # Prefill → Decode
tokens["E"].animate.move_to(slots[0]), # Decode → Cleanup
tokens["D"].animate.move_to(slots[1]), # Cleanup → Refill
)
self.wait(0.25)
self.play(FadeOut(tokens["D"], scale=0.6))
tokens["G"] = make_token("G", BATCH_COLORS[6])
tokens["G"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
self.play(FadeIn(tokens["G"], scale=0.7))
self.wait(0.35)
# drop note: constant throughput, all enter at Prefill
flow_note = Text("All requests enter at Prefill — pipeline never drains",
font_size=15, color=GREEN).next_to(states, DOWN, buff=0.55)
self.play(Write(flow_note))
self.wait(1.5)
self.play(FadeOut(flow_note))
# clear tokens
self.play(*[FadeOut(t) for t in tokens.values()])
# ═══════════════════════════════════════════════════
# 7. Position-Grouped Decode highlight
# ═══════════════════════════════════════════════════
# show multiple tokens grouped at Decode
d_pos = states[3].get_center()
d_tokens = [
make_token("T" + str(i), BATCH_COLORS[i]) for i in range(4)
]
positions = [
d_pos + RIGHT * 1.2 + UP * 0.45,
d_pos + RIGHT * 1.2,
d_pos + RIGHT * 2.5 + UP * 0.45,
d_pos + RIGHT * 2.5,
]
for i in range(4):
d_tokens[i].move_to(positions[i])
self.play(FadeIn(d_tokens[i], scale=0.6), run_time=0.2)
ring = SurroundingRectangle(states[3], color=YELLOW, buff=0.12, stroke_width=3)
ring_txt = Text(
"Position-Grouped Batching\nSame decode position → single matmul",
font_size=14, color=YELLOW, line_spacing=0.6,
).next_to(states[3], DOWN, buff=0.5)
self.play(Create(ring), Write(ring_txt))
self.wait(2.0)
self.play(FadeOut(ring), FadeOut(ring_txt),
*[FadeOut(t) for t in d_tokens])
# ═══════════════════════════════════════════════════
# 8. O(1) Bitmask Slot Allocation
# ═══════════════════════════════════════════════════
bitmask_title = Text("O(1) Slot Allocation via Bitmask",
font_size=22, color=ORANGE).next_to(states, DOWN, buff=0.75)
bitmask_desc = Text("free_slots = ~occupied_mask (one-clock op)",
font_size=15, color=GRAY).next_to(bitmask_title, DOWN, buff=0.15)
self.play(Write(bitmask_title), Write(bitmask_desc))
self.wait(1.5)
# animate bitmask bits flipping
bits_group = VGroup()
bit_size = 0.18
for i in range(16):
square = Square(side_length=bit_size * 2, color=GRAY,
fill_opacity=0.0, stroke_width=1.2)
if i in (2, 5, 9, 13):
square.set_fill(GRAY, opacity=0.5)
bits_group.add(square)
bits_group.arrange(RIGHT, buff=0.06)
bits_group.next_to(bitmask_desc, DOWN, buff=0.3)
occupied_lbl = Text("occupied_mask", font_size=11, color=RED).next_to(bits_group, LEFT, buff=0.4)
self.play(Create(bits_group), Write(occupied_lbl))
# flip to ~occupied
flipped = VGroup()
for i, sq in enumerate(bits_group):
copy_sq = Square(side_length=bit_size * 2, color=GRAY,
fill_opacity=0.0, stroke_width=1.2).move_to(sq)
if i not in (2, 5, 9, 13):
copy_sq.set_fill(GRAY, opacity=0.5)
flipped.add(copy_sq)
free_lbl = Text("free_slots", font_size=11, color=GREEN) \
.next_to(flipped, LEFT, buff=0.4).align_to(occupied_lbl, LEFT)
self.play(Transform(bits_group, flipped),
Transform(occupied_lbl, free_lbl))
self.wait(1.2)
self.play(FadeOut(bits_group), FadeOut(occupied_lbl),
FadeOut(bitmask_title), FadeOut(bitmask_desc))
# ═══════════════════════════════════════════════════
# 9. Gantt timeline comparison — Static vs Continuous
# ═══════════════════════════════════════════════════
self.play(
*[FadeOut(m) for m in self.mobjects if m is not title and m is not bar],
FadeOut(loop), FadeOut(loop_lbl),
)
for s in states:
self.play(FadeOut(s), run_time=0.10)
for a in trans_arrows:
self.play(FadeOut(a), run_time=0.10)
self.wait(0.2)
# ── layout constants ──
CELL = 0.44 # width per time tick
BH = 0.32 # bar height
BGAP = 0.10 # gap between rows
ROW = BH + BGAP # 0.42 — row pitch
TICKS = 12 # time columns
PANEL_W = TICKS * CELL # 5.28
L_OX = -5.8 # left-panel origin x
R_OX = 1.0 # right-panel origin x
GY = 2.0 # gantt top y
def gbox(ox, y, start, span, color, fill=0.75):
x = ox + start * CELL
w = span * CELL
return Rectangle(
width=w, height=BH, color=color,
fill_opacity=fill, stroke_width=0,
).move_to([x + w / 2, y, 0])
def batch_box(ox, y_gpu, y_last_req, start, span, color, label_txt):
w = span * CELL
top = y_gpu + BH / 2 + 0.06
bot = y_last_req - BH / 2 - 0.06
h = top - bot
cx = ox + (start + span / 2) * CELL
cy = (top + bot) / 2
rect = Rectangle(
width=w, height=h, color=color,
stroke_width=1.8, fill_opacity=0.04,
)
rect.move_to([cx, cy, 0])
lbl = Text(label_txt, font_size=12, color=color).next_to(rect, UP, buff=0.06)
return rect, lbl
def taxis(ox, ty):
line = Line(
[ox, ty, 0], [ox + PANEL_W, ty, 0],
color=GRAY, stroke_width=1.2,
)
ticks_vg = VGroup()
for t in range(TICKS + 1):
ti = Line(DOWN * 0.06, UP * 0.06, color=GRAY, stroke_width=0.8)
ti.move_to([ox + t * CELL, ty, 0])
ticks_vg.add(ti)
nums_vg = VGroup()
for t in range(0, TICKS + 1, 3):
n = Text(str(t), font_size=11, color=GRAY).next_to(
[ox + t * CELL, ty, 0], DOWN, buff=0.10,
)
nums_vg.add(n)
return VGroup(line, ticks_vg, nums_vg)
# ── Left: Static Batching ──
s_title = Text("Static Batching", font_size=26, color=RED)
s_title.move_to([L_OX + PANEL_W / 2, GY + 0.65, 0])
s_note = Text("requests wait → batch together → all run same length · GPU idle gaps",
font_size=13, color=RED) \
.move_to([L_OX + PANEL_W / 2, -1.6, 0])
self.play(Write(s_title))
self.wait(0.25)
st_axis = taxis(L_OX, GY)
self.play(Create(st_axis))
gpu_l = Text("GPU", font_size=14, color=WHITE)
gpu_l.move_to([L_OX - 0.55, GY - ROW, 0])
self.play(Write(gpu_l))
# Static GPU: idle [0-2], batch 1 [2-6], batch 2 [6-10], idle [10-12]
s_y_gpu = GY - ROW
s_gpu_idle1 = gbox(L_OX, s_y_gpu, 0, 2, RED, 0.45)
s_gpu_batch1 = gbox(L_OX, s_y_gpu, 2, 4, GREEN)
s_gpu_batch2 = gbox(L_OX, s_y_gpu, 6, 4, GREEN)
s_gpu_idle2 = gbox(L_OX, s_y_gpu, 10, 2, RED, 0.45)
s_gpu_bars = [s_gpu_idle1, s_gpu_batch1, s_gpu_batch2, s_gpu_idle2]
for seg in s_gpu_bars:
self.play(GrowFromEdge(seg, LEFT), run_time=0.09)
# IDLE labels over the red idle strips
s_idle1 = Text("IDLE", font_size=10, color=RED, weight=BOLD) \
.move_to([L_OX + 1 * CELL, s_y_gpu, 0])
s_idle2 = Text("IDLE", font_size=10, color=RED, weight=BOLD) \
.move_to([L_OX + 11 * CELL, s_y_gpu, 0])
self.play(Write(s_idle1), Write(s_idle2))
# Same 5 requests as continuous — but scheduled in batches
# each gets a gray WAIT bar before its coloured RUN bar
# (name, color, wait_start, wait_end, run_start, run_end)
s_req_defs = [
("A", ORANGE, 0, 2, 2, 6), # arrives t=0, waits for C → batch 1
("B", BLUE, 1, 2, 2, 6), # arrives t=1, waits for C
("C", PINK, 2, 2, 2, 6), # arrives t=2, no wait (last to arrive)
("D", ORANGE, 4, 6, 6, 10), # arrives t=4, waits for batch 1 to free GPU
("E", BLUE, 6, 6, 6, 10), # arrives t=6, no wait (GPU just freed)
]
s_bars = []
for i, (name, col, ws, we, rs, re) in enumerate(s_req_defs):
y = s_y_gpu - (i + 1) * ROW
lbl = Text(f"Req {name}", font_size=12, color=col)
lbl.move_to([L_OX - 0.55, y, 0])
items = [lbl]
anims = [FadeIn(lbl)]
if we - ws > 0.02:
wbar = gbox(L_OX, y, ws, we - ws, GRAY, 0.28)
items.append(wbar)
anims.append(GrowFromEdge(wbar, LEFT))
rbar = gbox(L_OX, y, rs, re - rs, col, 0.60)
items.append(rbar)
anims.append(GrowFromEdge(rbar, LEFT))
s_bars.extend(items)
self.play(*anims, run_time=0.09)
# batch boxes — connect GPU busy segments to the requests they serve
s_y_last3 = s_y_gpu - 3 * ROW # Req C is the 3rd request row
s_y_last5 = s_y_gpu - 5 * ROW # Req E is the 5th request row
b1_rect, b1_lbl = batch_box(L_OX, s_y_gpu, s_y_last3, 2, 4, RED, "Batch 1")
b2_rect, b2_lbl = batch_box(L_OX, s_y_gpu, s_y_last5, 6, 4, RED, "Batch 2")
self.play(Create(b1_rect), Write(b1_lbl))
self.play(Create(b2_rect), Write(b2_lbl))
self.wait(0.8)
# ── Right: Continuous Batching ──
c_title = Text("Continuous Batching", font_size=26, color=GREEN)
c_title.move_to([R_OX + PANEL_W / 2, GY + 0.65, 0])
c_note = Text("no waiting · no padding · GPU never idle",
font_size=13, color=GREEN) \
.move_to([R_OX + PANEL_W / 2, -1.6, 0])
self.play(Write(c_title))
self.wait(0.25)
ct_axis = taxis(R_OX, GY)
self.play(Create(ct_axis))
c_y_gpu = GY - ROW
cgpu_l = Text("GPU", font_size=14, color=WHITE)
cgpu_l.move_to([R_OX - 0.55, c_y_gpu, 0])
self.play(Write(cgpu_l))
# Continuous GPU: busy all 12 ticks (pipeline never drains)
c_gpu = gbox(R_OX, c_y_gpu, 0, 12, GREEN, 0.75)
self.play(GrowFromEdge(c_gpu, LEFT), run_time=0.5)
# Same 5 requests — start immediately, no wait, staggered naturally
c_reqs = [
("A", ORANGE, 0, 4),
("B", BLUE, 1, 4),
("C", PINK, 2, 4),
("D", ORANGE, 4, 4),
("E", BLUE, 6, 4),
]
c_bars = []
c_n_reqs = len(c_reqs)
for i, (name, col, start, span) in enumerate(c_reqs):
y = c_y_gpu - (i + 1) * ROW
lbl = Text(f"Req {name}", font_size=12, color=col)
lbl.move_to([R_OX - 0.55, y, 0])
bar_rect = gbox(R_OX, y, start, span, col, 0.60)
c_bars.extend([lbl, bar_rect])
self.play(FadeIn(lbl), GrowFromEdge(bar_rect, LEFT), run_time=0.09)
self.wait(0.3)
# continuous box — GPU always serving
c_y_last = c_y_gpu - c_n_reqs * ROW
c_box_rect, c_box_lbl = batch_box(R_OX, c_y_gpu, c_y_last, 0, 12, GREEN, "Always Serving")
self.play(Create(c_box_rect), Write(c_box_lbl))
self.wait(1.0)
# count annotation
s_count = Text("5 reqs · 2 batches · GPU idle gaps",
font_size=16, color=RED, weight=BOLD) \
.next_to(s_gpu_batch1, DOWN, buff=1.0).align_to(s_gpu_batch1, LEFT)
c_count = Text("5 reqs · continuous · GPU never idle",
font_size=16, color=GREEN, weight=BOLD) \
.next_to(c_gpu, DOWN, buff=1.0).align_to(c_gpu, LEFT)
self.play(Write(s_note), Write(c_note))
self.wait(0.3)
self.play(Write(s_count), Write(c_count))
self.wait(2.5)
self.play(FadeOut(s_count), FadeOut(c_count))
# ── Fade out gantt ──
gantt_mobs = [
title, bar, s_title, s_note, c_title, c_note,
gpu_l, cgpu_l, s_idle1, s_idle2, st_axis, ct_axis,
*s_gpu_bars, c_gpu, *s_bars, *c_bars,
b1_rect, b1_lbl, b2_rect, b2_lbl, c_box_rect, c_box_lbl,
]
self.play(*[FadeOut(m) for m in gantt_mobs])
self.wait(0.2)
# ═══════════════════════════════════════════════════
# 10. Throughput comparison with animated bars
# ═══════════════════════════════════════════════════
# ---- title ----
compare_title = Text("Throughput Comparison", font_size=30, color=BLUE)
self.play(Write(compare_title))
self.wait(0.2)
self.play(compare_title.animate.to_edge(UP).scale(0.55))
self.wait(0.2)
# ---- bar config ----
bar_max_w = 5.0
bar_h = 0.55
row_gap = 0.8
ratio = 1.0 / 3.4
# ---- Static Batching row ----
s_label = Text("Static Batching", font_size=24, color=RED)
s_rect = Rectangle(width=bar_max_w, height=bar_h, color=RED, stroke_width=1.5)
s_bar_rect = Rectangle(
width=bar_max_w * ratio, height=bar_h,
color=RED, fill_opacity=0.55, stroke_width=0,
)
s_num = Text("1.0x", font_size=24, color=RED)
# ---- Continuous Batching row ----
c_label = Text("Continuous Batching", font_size=24, color=GREEN)
c_rect = Rectangle(width=bar_max_w, height=bar_h, color=GREEN, stroke_width=1.5)
c_bar_rect = Rectangle(
width=bar_max_w, height=bar_h,
color=GREEN, fill_opacity=0.55, stroke_width=0,
)
c_num = Text("3.4x", font_size=24, color=GREEN)
# position rects first, then align bars
s_rect.move_to(ORIGIN + UP * (row_gap / 2 + bar_h / 2))
c_rect.move_to(ORIGIN + DOWN * (row_gap / 2 + bar_h / 2))
s_bar_rect.align_to(s_rect, LEFT).align_to(s_rect, UP)
c_bar_rect.align_to(c_rect, LEFT).align_to(c_rect, UP)
# labels left, nums right
s_label.next_to(s_rect, LEFT, buff=0.4)
c_label.next_to(c_rect, LEFT, buff=0.4)
s_num.next_to(s_rect, RIGHT, buff=0.4)
c_num.next_to(c_rect, RIGHT, buff=0.4)
self.play(
Create(s_rect), Create(c_rect),
Write(s_label), Write(c_label),
)
self.wait(0.3)
# grow bars
self.play(GrowFromEdge(s_bar_rect, LEFT), rate_func=linear, run_time=0.6)
self.wait(0.3)
self.play(GrowFromEdge(c_bar_rect, LEFT), rate_func=linear, run_time=0.6)
self.wait(0.3)
# show values
self.play(Write(s_num), Write(c_num))
self.wait(2.5)
self.play(*[FadeOut(m) for m in self.mobjects])