fix: new requests enter at Prefill stage, not Refill
This commit is contained in:
parent
fc68fc9107
commit
4ed16a70b4
|
|
@ -82,28 +82,24 @@ class ContinuousBatching(Scene):
|
||||||
"A": make_token("A", BATCH_COLORS[0]),
|
"A": make_token("A", BATCH_COLORS[0]),
|
||||||
"B": make_token("B", BATCH_COLORS[1]),
|
"B": make_token("B", BATCH_COLORS[1]),
|
||||||
"C": make_token("C", BATCH_COLORS[2]),
|
"C": make_token("C", BATCH_COLORS[2]),
|
||||||
"D": make_token("D", BATCH_COLORS[3]),
|
|
||||||
}
|
}
|
||||||
# assign to states
|
# all three at consecutive stages, Prefill is the entry point
|
||||||
tokens["A"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill
|
tokens["A"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill
|
||||||
tokens["B"].move_to(states[3]).shift(RIGHT * 1.5) # Decode
|
tokens["B"].move_to(states[3]).shift(RIGHT * 1.5) # Decode
|
||||||
tokens["C"].move_to(states[1]).shift(RIGHT * 1.5) # Refill
|
tokens["C"].move_to(states[0]).shift(RIGHT * 1.5) # Cleanup
|
||||||
tokens["D"].move_to(states[0]).shift(RIGHT * 1.5) # Cleanup
|
|
||||||
|
|
||||||
for t in tokens.values():
|
for t in tokens.values():
|
||||||
self.play(FadeIn(t, scale=0.7), run_time=0.25)
|
self.play(FadeIn(t, scale=0.7), run_time=0.25)
|
||||||
self.wait(0.2)
|
self.wait(0.2)
|
||||||
|
|
||||||
note = Text("4 batches distributed across 4 states", font_size=16, color=WHITE) \
|
note = Text("Every request starts at Prefill", font_size=16, color=WHITE) \
|
||||||
.next_to(states, DOWN, buff=0.55)
|
.next_to(states, DOWN, buff=0.55)
|
||||||
self.play(Write(note))
|
self.play(Write(note))
|
||||||
self.wait(1.0)
|
self.wait(1.0)
|
||||||
self.play(FadeOut(note))
|
self.play(FadeOut(note))
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════
|
||||||
# 3. Tick 1 — all tokens advance one state
|
# 3. Tick 1 — advance, C exits, new D enters at Prefill
|
||||||
# A: Prefill → Decode B: Decode → Cleanup
|
|
||||||
# C: Refill → Prefill D: Cleanup → Refill
|
|
||||||
# ═══════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════
|
||||||
slots = [
|
slots = [
|
||||||
states[0].get_center() + RIGHT * 1.5, # Cleanup
|
states[0].get_center() + RIGHT * 1.5, # Cleanup
|
||||||
|
|
@ -113,58 +109,70 @@ class ContinuousBatching(Scene):
|
||||||
]
|
]
|
||||||
|
|
||||||
self.play(
|
self.play(
|
||||||
tokens["A"].animate.move_to(slots[3]), # → Decode
|
tokens["A"].animate.move_to(slots[3]), # Prefill → Decode
|
||||||
tokens["B"].animate.move_to(slots[0]), # → Cleanup
|
tokens["B"].animate.move_to(slots[0]), # Decode → Cleanup
|
||||||
tokens["C"].animate.move_to(slots[2]), # → Prefill
|
tokens["C"].animate.move_to(slots[1]), # Cleanup → Refill
|
||||||
tokens["D"].animate.move_to(slots[1]), # → Refill
|
)
|
||||||
|
self.wait(0.3)
|
||||||
|
|
||||||
|
# C (now at Refill) exits after completing the loop
|
||||||
|
# new D enters at Prefill
|
||||||
|
self.play(FadeOut(tokens["C"], scale=0.6))
|
||||||
|
tokens["D"] = make_token("D", BATCH_COLORS[3])
|
||||||
|
tokens["D"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
|
||||||
|
self.play(FadeIn(tokens["D"], scale=0.7))
|
||||||
|
self.wait(0.25)
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════
|
||||||
|
# 4. Tick 2 — advance, B exits, new E enters at Prefill
|
||||||
|
# ═══════════════════════════════════════════════════
|
||||||
|
self.play(
|
||||||
|
tokens["D"].animate.move_to(slots[3]), # Prefill → Decode
|
||||||
|
tokens["A"].animate.move_to(slots[0]), # Decode → Cleanup
|
||||||
|
tokens["B"].animate.move_to(slots[1]), # Cleanup → Refill
|
||||||
)
|
)
|
||||||
self.wait(0.3)
|
self.wait(0.3)
|
||||||
|
|
||||||
# B finished → replace with new token E
|
|
||||||
self.play(FadeOut(tokens["B"], scale=0.6))
|
self.play(FadeOut(tokens["B"], scale=0.6))
|
||||||
tokens["E"] = make_token("E", BATCH_COLORS[4])
|
tokens["E"] = make_token("E", BATCH_COLORS[4])
|
||||||
tokens["E"].move_to(states[1]).shift(RIGHT * 1.5) # Refill
|
tokens["E"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
|
||||||
self.play(FadeIn(tokens["E"], scale=0.7))
|
self.play(FadeIn(tokens["E"], scale=0.7))
|
||||||
self.wait(0.25)
|
self.wait(0.25)
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════
|
||||||
# 4. Tick 2 — advance again
|
# 5. Tick 3 — advance, A exits, new F enters at Prefill
|
||||||
# ═══════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════
|
||||||
self.play(
|
self.play(
|
||||||
tokens["A"].animate.move_to(slots[0]), # Decode → Cleanup
|
tokens["E"].animate.move_to(slots[3]), # Prefill → Decode
|
||||||
tokens["D"].animate.move_to(slots[2]), # Refill → Prefill
|
tokens["D"].animate.move_to(slots[0]), # Decode → Cleanup
|
||||||
tokens["C"].animate.move_to(slots[3]), # Prefill → Decode
|
tokens["A"].animate.move_to(slots[1]), # Cleanup → Refill
|
||||||
tokens["E"].animate.move_to(slots[1]), # (entered) → keeps Refill
|
|
||||||
)
|
)
|
||||||
self.wait(0.3)
|
self.wait(0.25)
|
||||||
|
|
||||||
# A finished → replace with F
|
|
||||||
self.play(FadeOut(tokens["A"], scale=0.6))
|
self.play(FadeOut(tokens["A"], scale=0.6))
|
||||||
tokens["F"] = make_token("F", BATCH_COLORS[5])
|
tokens["F"] = make_token("F", BATCH_COLORS[5])
|
||||||
tokens["F"].move_to(states[1]).shift(RIGHT * 1.5)
|
tokens["F"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
|
||||||
self.play(FadeIn(tokens["F"], scale=0.7))
|
self.play(FadeIn(tokens["F"], scale=0.7))
|
||||||
self.wait(0.25)
|
self.wait(0.25)
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════
|
||||||
# 5. Tick 3 — faster cycle, show pipeline never drains
|
# 6. Tick 4 — advance, F exits, new G enters at Prefill
|
||||||
# ═══════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════
|
||||||
self.play(
|
self.play(
|
||||||
tokens["C"].animate.move_to(slots[0]), # Decode → Cleanup
|
tokens["F"].animate.move_to(slots[3]), # Prefill → Decode
|
||||||
tokens["D"].animate.move_to(slots[3]), # Prefill → Decode
|
tokens["E"].animate.move_to(slots[0]), # Decode → Cleanup
|
||||||
tokens["E"].animate.move_to(slots[2]), # Refill → Prefill
|
tokens["D"].animate.move_to(slots[1]), # Cleanup → Refill
|
||||||
tokens["F"].animate.move_to(slots[1]), # → Refill
|
|
||||||
)
|
)
|
||||||
self.wait(0.25)
|
self.wait(0.25)
|
||||||
|
|
||||||
# C done → G enters
|
self.play(FadeOut(tokens["D"], scale=0.6))
|
||||||
self.play(FadeOut(tokens["C"], scale=0.6))
|
|
||||||
tokens["G"] = make_token("G", BATCH_COLORS[6])
|
tokens["G"] = make_token("G", BATCH_COLORS[6])
|
||||||
tokens["G"].move_to(states[1]).shift(RIGHT * 1.5)
|
tokens["G"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry
|
||||||
self.play(FadeIn(tokens["G"], scale=0.7))
|
self.play(FadeIn(tokens["G"], scale=0.7))
|
||||||
self.wait(0.35)
|
self.wait(0.35)
|
||||||
|
|
||||||
# drop note: constant throughput
|
# drop note: constant throughput, all enter at Prefill
|
||||||
flow_note = Text("Pipeline never drains — constant throughput",
|
flow_note = Text("All requests enter at Prefill — pipeline never drains",
|
||||||
font_size=15, color=GREEN).next_to(states, DOWN, buff=0.55)
|
font_size=15, color=GREEN).next_to(states, DOWN, buff=0.55)
|
||||||
self.play(Write(flow_note))
|
self.play(Write(flow_note))
|
||||||
self.wait(1.5)
|
self.wait(1.5)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue