fix: new requests enter at Prefill stage, not Refill
This commit is contained in:
parent
fc68fc9107
commit
4ed16a70b4
|
|
@ -82,28 +82,24 @@ class ContinuousBatching(Scene):
|
|||
"A": make_token("A", BATCH_COLORS[0]),
|
||||
"B": make_token("B", BATCH_COLORS[1]),
|
||||
"C": make_token("C", BATCH_COLORS[2]),
|
||||
"D": make_token("D", BATCH_COLORS[3]),
|
||||
}
|
||||
# assign to states
|
||||
tokens["A"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill
|
||||
tokens["B"].move_to(states[3]).shift(RIGHT * 1.5) # Decode
|
||||
tokens["C"].move_to(states[1]).shift(RIGHT * 1.5) # Refill
|
||||
tokens["D"].move_to(states[0]).shift(RIGHT * 1.5) # Cleanup
|
||||
# all three at consecutive stages, Prefill is the entry point
|
||||
tokens["A"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill
|
||||
tokens["B"].move_to(states[3]).shift(RIGHT * 1.5) # Decode
|
||||
tokens["C"].move_to(states[0]).shift(RIGHT * 1.5) # Cleanup
|
||||
|
||||
for t in tokens.values():
|
||||
self.play(FadeIn(t, scale=0.7), run_time=0.25)
|
||||
self.wait(0.2)
|
||||
|
||||
note = Text("4 batches distributed across 4 states", font_size=16, color=WHITE) \
|
||||
note = Text("Every request starts at Prefill", font_size=16, color=WHITE) \
|
||||
.next_to(states, DOWN, buff=0.55)
|
||||
self.play(Write(note))
|
||||
self.wait(1.0)
|
||||
self.play(FadeOut(note))
|
||||
|
||||
# ═══════════════════════════════════════════════════
|
||||
# 3. Tick 1 — all tokens advance one state
|
||||
# A: Prefill → Decode B: Decode → Cleanup
|
||||
# C: Refill → Prefill D: Cleanup → Refill
|
||||
# 3. Tick 1 — advance, C exits, new D enters at Prefill
|
||||
# ═══════════════════════════════════════════════════
|
||||
slots = [
|
||||
states[0].get_center() + RIGHT * 1.5, # Cleanup
|
||||
|
|
@ -113,58 +109,70 @@ class ContinuousBatching(Scene):
|
|||
]
|
||||
|
||||
self.play(
|
||||
tokens["A"].animate.move_to(slots[3]), # → Decode
|
||||
tokens["B"].animate.move_to(slots[0]), # → Cleanup
|
||||
tokens["C"].animate.move_to(slots[2]), # → Prefill
|
||||
tokens["D"].animate.move_to(slots[1]), # → Refill
|
||||
tokens["A"].animate.move_to(slots[3]), # Prefill → Decode
|
||||
tokens["B"].animate.move_to(slots[0]), # Decode → Cleanup
|
||||
tokens["C"].animate.move_to(slots[1]), # Cleanup → Refill
|
||||
)
|
||||
self.wait(0.3)
|
||||
|
||||
# C (now at Refill) exits after completing the loop
|
||||
# new D enters at Prefill
|
||||
self.play(FadeOut(tokens["C"], scale=0.6))
|
||||
tokens["D"] = make_token("D", BATCH_COLORS[3])
|
||||
tokens["D"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
|
||||
self.play(FadeIn(tokens["D"], scale=0.7))
|
||||
self.wait(0.25)
|
||||
|
||||
# ═══════════════════════════════════════════════════
|
||||
# 4. Tick 2 — advance, B exits, new E enters at Prefill
|
||||
# ═══════════════════════════════════════════════════
|
||||
self.play(
|
||||
tokens["D"].animate.move_to(slots[3]), # Prefill → Decode
|
||||
tokens["A"].animate.move_to(slots[0]), # Decode → Cleanup
|
||||
tokens["B"].animate.move_to(slots[1]), # Cleanup → Refill
|
||||
)
|
||||
self.wait(0.3)
|
||||
|
||||
# B finished → replace with new token E
|
||||
self.play(FadeOut(tokens["B"], scale=0.6))
|
||||
tokens["E"] = make_token("E", BATCH_COLORS[4])
|
||||
tokens["E"].move_to(states[1]).shift(RIGHT * 1.5) # Refill
|
||||
tokens["E"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
|
||||
self.play(FadeIn(tokens["E"], scale=0.7))
|
||||
self.wait(0.25)
|
||||
|
||||
# ═══════════════════════════════════════════════════
|
||||
# 4. Tick 2 — advance again
|
||||
# 5. Tick 3 — advance, A exits, new F enters at Prefill
|
||||
# ═══════════════════════════════════════════════════
|
||||
self.play(
|
||||
tokens["A"].animate.move_to(slots[0]), # Decode → Cleanup
|
||||
tokens["D"].animate.move_to(slots[2]), # Refill → Prefill
|
||||
tokens["C"].animate.move_to(slots[3]), # Prefill → Decode
|
||||
tokens["E"].animate.move_to(slots[1]), # (entered) → keeps Refill
|
||||
tokens["E"].animate.move_to(slots[3]), # Prefill → Decode
|
||||
tokens["D"].animate.move_to(slots[0]), # Decode → Cleanup
|
||||
tokens["A"].animate.move_to(slots[1]), # Cleanup → Refill
|
||||
)
|
||||
self.wait(0.3)
|
||||
self.wait(0.25)
|
||||
|
||||
# A finished → replace with F
|
||||
self.play(FadeOut(tokens["A"], scale=0.6))
|
||||
tokens["F"] = make_token("F", BATCH_COLORS[5])
|
||||
tokens["F"].move_to(states[1]).shift(RIGHT * 1.5)
|
||||
tokens["F"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
|
||||
self.play(FadeIn(tokens["F"], scale=0.7))
|
||||
self.wait(0.25)
|
||||
|
||||
# ═══════════════════════════════════════════════════
|
||||
# 5. Tick 3 — faster cycle, show pipeline never drains
|
||||
# 6. Tick 4 — advance, F exits, new G enters at Prefill
|
||||
# ═══════════════════════════════════════════════════
|
||||
self.play(
|
||||
tokens["C"].animate.move_to(slots[0]), # Decode → Cleanup
|
||||
tokens["D"].animate.move_to(slots[3]), # Prefill → Decode
|
||||
tokens["E"].animate.move_to(slots[2]), # Refill → Prefill
|
||||
tokens["F"].animate.move_to(slots[1]), # → Refill
|
||||
tokens["F"].animate.move_to(slots[3]), # Prefill → Decode
|
||||
tokens["E"].animate.move_to(slots[0]), # Decode → Cleanup
|
||||
tokens["D"].animate.move_to(slots[1]), # Cleanup → Refill
|
||||
)
|
||||
self.wait(0.25)
|
||||
|
||||
# C done → G enters
|
||||
self.play(FadeOut(tokens["C"], scale=0.6))
|
||||
self.play(FadeOut(tokens["D"], scale=0.6))
|
||||
tokens["G"] = make_token("G", BATCH_COLORS[6])
|
||||
tokens["G"].move_to(states[1]).shift(RIGHT * 1.5)
|
||||
tokens["G"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
|
||||
self.play(FadeIn(tokens["G"], scale=0.7))
|
||||
self.wait(0.35)
|
||||
|
||||
# drop note: constant throughput
|
||||
flow_note = Text("Pipeline never drains — constant throughput",
|
||||
# drop note: constant throughput, all enter at Prefill
|
||||
flow_note = Text("All requests enter at Prefill — pipeline never drains",
|
||||
font_size=15, color=GREEN).next_to(states, DOWN, buff=0.55)
|
||||
self.play(Write(flow_note))
|
||||
self.wait(1.5)
|
||||
|
|
|
|||
Loading…
Reference in New Issue