fix: new requests enter at Prefill stage, not Refill

This commit is contained in:
ViperEkura 2026-05-06 21:59:47 +08:00
parent fc68fc9107
commit 4ed16a70b4
1 changed files with 42 additions and 34 deletions

View File

@ -82,28 +82,24 @@ class ContinuousBatching(Scene):
"A": make_token("A", BATCH_COLORS[0]), "A": make_token("A", BATCH_COLORS[0]),
"B": make_token("B", BATCH_COLORS[1]), "B": make_token("B", BATCH_COLORS[1]),
"C": make_token("C", BATCH_COLORS[2]), "C": make_token("C", BATCH_COLORS[2]),
"D": make_token("D", BATCH_COLORS[3]),
} }
# assign to states # all three at consecutive stages, Prefill is the entry point
tokens["A"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill tokens["A"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill
tokens["B"].move_to(states[3]).shift(RIGHT * 1.5) # Decode tokens["B"].move_to(states[3]).shift(RIGHT * 1.5) # Decode
tokens["C"].move_to(states[1]).shift(RIGHT * 1.5) # Refill tokens["C"].move_to(states[0]).shift(RIGHT * 1.5) # Cleanup
tokens["D"].move_to(states[0]).shift(RIGHT * 1.5) # Cleanup
for t in tokens.values(): for t in tokens.values():
self.play(FadeIn(t, scale=0.7), run_time=0.25) self.play(FadeIn(t, scale=0.7), run_time=0.25)
self.wait(0.2) self.wait(0.2)
note = Text("4 batches distributed across 4 states", font_size=16, color=WHITE) \ note = Text("Every request starts at Prefill", font_size=16, color=WHITE) \
.next_to(states, DOWN, buff=0.55) .next_to(states, DOWN, buff=0.55)
self.play(Write(note)) self.play(Write(note))
self.wait(1.0) self.wait(1.0)
self.play(FadeOut(note)) self.play(FadeOut(note))
# ═══════════════════════════════════════════════════ # ═══════════════════════════════════════════════════
# 3. Tick 1 — all tokens advance one state # 3. Tick 1 — advance, C exits, new D enters at Prefill
# A: Prefill → Decode B: Decode → Cleanup
# C: Refill → Prefill D: Cleanup → Refill
# ═══════════════════════════════════════════════════ # ═══════════════════════════════════════════════════
slots = [ slots = [
states[0].get_center() + RIGHT * 1.5, # Cleanup states[0].get_center() + RIGHT * 1.5, # Cleanup
@ -113,58 +109,70 @@ class ContinuousBatching(Scene):
] ]
self.play( self.play(
tokens["A"].animate.move_to(slots[3]), # → Decode tokens["A"].animate.move_to(slots[3]), # Prefill → Decode
tokens["B"].animate.move_to(slots[0]), # → Cleanup tokens["B"].animate.move_to(slots[0]), # Decode → Cleanup
tokens["C"].animate.move_to(slots[2]), # → Prefill tokens["C"].animate.move_to(slots[1]), # Cleanup → Refill
tokens["D"].animate.move_to(slots[1]), # → Refill )
self.wait(0.3)
# C (now at Refill) exits after completing the loop
# new D enters at Prefill
self.play(FadeOut(tokens["C"], scale=0.6))
tokens["D"] = make_token("D", BATCH_COLORS[3])
tokens["D"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
self.play(FadeIn(tokens["D"], scale=0.7))
self.wait(0.25)
# ═══════════════════════════════════════════════════
# 4. Tick 2 — advance, B exits, new E enters at Prefill
# ═══════════════════════════════════════════════════
self.play(
tokens["D"].animate.move_to(slots[3]), # Prefill → Decode
tokens["A"].animate.move_to(slots[0]), # Decode → Cleanup
tokens["B"].animate.move_to(slots[1]), # Cleanup → Refill
) )
self.wait(0.3) self.wait(0.3)
# B finished → replace with new token E
self.play(FadeOut(tokens["B"], scale=0.6)) self.play(FadeOut(tokens["B"], scale=0.6))
tokens["E"] = make_token("E", BATCH_COLORS[4]) tokens["E"] = make_token("E", BATCH_COLORS[4])
tokens["E"].move_to(states[1]).shift(RIGHT * 1.5) # Refill tokens["E"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
self.play(FadeIn(tokens["E"], scale=0.7)) self.play(FadeIn(tokens["E"], scale=0.7))
self.wait(0.25) self.wait(0.25)
# ═══════════════════════════════════════════════════ # ═══════════════════════════════════════════════════
# 4. Tick 2 — advance again # 5. Tick 3 — advance, A exits, new F enters at Prefill
# ═══════════════════════════════════════════════════ # ═══════════════════════════════════════════════════
self.play( self.play(
tokens["A"].animate.move_to(slots[0]), # Decode → Cleanup tokens["E"].animate.move_to(slots[3]), # Prefill → Decode
tokens["D"].animate.move_to(slots[2]), # Refill → Prefill tokens["D"].animate.move_to(slots[0]), # Decode → Cleanup
tokens["C"].animate.move_to(slots[3]), # Prefill → Decode tokens["A"].animate.move_to(slots[1]), # Cleanup → Refill
tokens["E"].animate.move_to(slots[1]), # (entered) → keeps Refill
) )
self.wait(0.3) self.wait(0.25)
# A finished → replace with F
self.play(FadeOut(tokens["A"], scale=0.6)) self.play(FadeOut(tokens["A"], scale=0.6))
tokens["F"] = make_token("F", BATCH_COLORS[5]) tokens["F"] = make_token("F", BATCH_COLORS[5])
tokens["F"].move_to(states[1]).shift(RIGHT * 1.5) tokens["F"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
self.play(FadeIn(tokens["F"], scale=0.7)) self.play(FadeIn(tokens["F"], scale=0.7))
self.wait(0.25) self.wait(0.25)
# ═══════════════════════════════════════════════════ # ═══════════════════════════════════════════════════
# 5. Tick 3 — faster cycle, show pipeline never drains # 6. Tick 4 — advance, F exits, new G enters at Prefill
# ═══════════════════════════════════════════════════ # ═══════════════════════════════════════════════════
self.play( self.play(
tokens["C"].animate.move_to(slots[0]), # Decode → Cleanup tokens["F"].animate.move_to(slots[3]), # Prefill → Decode
tokens["D"].animate.move_to(slots[3]), # Prefill → Decode tokens["E"].animate.move_to(slots[0]), # Decode → Cleanup
tokens["E"].animate.move_to(slots[2]), # Refill → Prefill tokens["D"].animate.move_to(slots[1]), # Cleanup → Refill
tokens["F"].animate.move_to(slots[1]), # → Refill
) )
self.wait(0.25) self.wait(0.25)
# C done → G enters self.play(FadeOut(tokens["D"], scale=0.6))
self.play(FadeOut(tokens["C"], scale=0.6))
tokens["G"] = make_token("G", BATCH_COLORS[6]) tokens["G"] = make_token("G", BATCH_COLORS[6])
tokens["G"].move_to(states[1]).shift(RIGHT * 1.5) tokens["G"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
self.play(FadeIn(tokens["G"], scale=0.7)) self.play(FadeIn(tokens["G"], scale=0.7))
self.wait(0.35) self.wait(0.35)
# drop note: constant throughput # drop note: constant throughput, all enter at Prefill
flow_note = Text("Pipeline never drains — constant throughput", flow_note = Text("All requests enter at Prefill — pipeline never drains",
font_size=15, color=GREEN).next_to(states, DOWN, buff=0.55) font_size=15, color=GREEN).next_to(states, DOWN, buff=0.55)
self.play(Write(flow_note)) self.play(Write(flow_note))
self.wait(1.5) self.wait(1.5)