diff --git a/continuous_batching.py b/continuous_batching.py index c7335fb..71694e9 100644 --- a/continuous_batching.py +++ b/continuous_batching.py @@ -82,28 +82,24 @@ class ContinuousBatching(Scene): "A": make_token("A", BATCH_COLORS[0]), "B": make_token("B", BATCH_COLORS[1]), "C": make_token("C", BATCH_COLORS[2]), - "D": make_token("D", BATCH_COLORS[3]), } - # assign to states - tokens["A"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill - tokens["B"].move_to(states[3]).shift(RIGHT * 1.5) # Decode - tokens["C"].move_to(states[1]).shift(RIGHT * 1.5) # Refill - tokens["D"].move_to(states[0]).shift(RIGHT * 1.5) # Cleanup + # all three at consecutive stages, Prefill is the entry point + tokens["A"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill + tokens["B"].move_to(states[3]).shift(RIGHT * 1.5) # Decode + tokens["C"].move_to(states[0]).shift(RIGHT * 1.5) # Cleanup for t in tokens.values(): self.play(FadeIn(t, scale=0.7), run_time=0.25) self.wait(0.2) - note = Text("4 batches distributed across 4 states", font_size=16, color=WHITE) \ + note = Text("Every request starts at Prefill", font_size=16, color=WHITE) \ .next_to(states, DOWN, buff=0.55) self.play(Write(note)) self.wait(1.0) self.play(FadeOut(note)) # ═══════════════════════════════════════════════════ - # 3. Tick 1 — all tokens advance one state - # A: Prefill → Decode B: Decode → Cleanup - # C: Refill → Prefill D: Cleanup → Refill + # 3. Tick 1 — advance, C exits, new D enters at Prefill # ═══════════════════════════════════════════════════ slots = [ states[0].get_center() + RIGHT * 1.5, # Cleanup @@ -113,58 +109,70 @@ class ContinuousBatching(Scene): ] self.play( - tokens["A"].animate.move_to(slots[3]), # → Decode - tokens["B"].animate.move_to(slots[0]), # → Cleanup - tokens["C"].animate.move_to(slots[2]), # → Prefill - tokens["D"].animate.move_to(slots[1]), # → Refill + tokens["A"].animate.move_to(slots[3]), # Prefill → Decode + tokens["B"].animate.move_to(slots[0]), # Decode → Cleanup + tokens["C"].animate.move_to(slots[1]), # Cleanup → Refill + ) + self.wait(0.3) + + # C (now at Refill) exits after completing the loop + # new D enters at Prefill + self.play(FadeOut(tokens["C"], scale=0.6)) + tokens["D"] = make_token("D", BATCH_COLORS[3]) + tokens["D"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry + self.play(FadeIn(tokens["D"], scale=0.7)) + self.wait(0.25) + + # ═══════════════════════════════════════════════════ + # 4. Tick 2 — advance, B exits, new E enters at Prefill + # ═══════════════════════════════════════════════════ + self.play( + tokens["D"].animate.move_to(slots[3]), # Prefill → Decode + tokens["A"].animate.move_to(slots[0]), # Decode → Cleanup + tokens["B"].animate.move_to(slots[1]), # Cleanup → Refill ) self.wait(0.3) - # B finished → replace with new token E self.play(FadeOut(tokens["B"], scale=0.6)) tokens["E"] = make_token("E", BATCH_COLORS[4]) - tokens["E"].move_to(states[1]).shift(RIGHT * 1.5) # Refill + tokens["E"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry self.play(FadeIn(tokens["E"], scale=0.7)) self.wait(0.25) # ═══════════════════════════════════════════════════ - # 4. Tick 2 — advance again + # 5. Tick 3 — advance, A exits, new F enters at Prefill # ═══════════════════════════════════════════════════ self.play( - tokens["A"].animate.move_to(slots[0]), # Decode → Cleanup - tokens["D"].animate.move_to(slots[2]), # Refill → Prefill - tokens["C"].animate.move_to(slots[3]), # Prefill → Decode - tokens["E"].animate.move_to(slots[1]), # (entered) → keeps Refill + tokens["E"].animate.move_to(slots[3]), # Prefill → Decode + tokens["D"].animate.move_to(slots[0]), # Decode → Cleanup + tokens["A"].animate.move_to(slots[1]), # Cleanup → Refill ) - self.wait(0.3) + self.wait(0.25) - # A finished → replace with F self.play(FadeOut(tokens["A"], scale=0.6)) tokens["F"] = make_token("F", BATCH_COLORS[5]) - tokens["F"].move_to(states[1]).shift(RIGHT * 1.5) + tokens["F"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry self.play(FadeIn(tokens["F"], scale=0.7)) self.wait(0.25) # ═══════════════════════════════════════════════════ - # 5. Tick 3 — faster cycle, show pipeline never drains + # 6. Tick 4 — advance, F exits, new G enters at Prefill # ═══════════════════════════════════════════════════ self.play( - tokens["C"].animate.move_to(slots[0]), # Decode → Cleanup - tokens["D"].animate.move_to(slots[3]), # Prefill → Decode - tokens["E"].animate.move_to(slots[2]), # Refill → Prefill - tokens["F"].animate.move_to(slots[1]), # → Refill + tokens["F"].animate.move_to(slots[3]), # Prefill → Decode + tokens["E"].animate.move_to(slots[0]), # Decode → Cleanup + tokens["D"].animate.move_to(slots[1]), # Cleanup → Refill ) self.wait(0.25) - # C done → G enters - self.play(FadeOut(tokens["C"], scale=0.6)) + self.play(FadeOut(tokens["D"], scale=0.6)) tokens["G"] = make_token("G", BATCH_COLORS[6]) - tokens["G"].move_to(states[1]).shift(RIGHT * 1.5) + tokens["G"].move_to(states[2]).shift(RIGHT * 1.5) # Prefill ← entry self.play(FadeIn(tokens["G"], scale=0.7)) self.wait(0.35) - # drop note: constant throughput - flow_note = Text("Pipeline never drains — constant throughput", + # drop note: constant throughput, all enter at Prefill + flow_note = Text("All requests enter at Prefill — pipeline never drains", font_size=15, color=GREEN).next_to(states, DOWN, buff=0.55) self.play(Write(flow_note)) self.wait(1.5)