diff --git a/architecture.py b/architecture.py index 56de9dc..c3ad35b 100644 --- a/architecture.py +++ b/architecture.py @@ -1,76 +1,125 @@ -"""AstrAI promo: Full architecture overview.""" +"""AstrAI promo: Full architecture overview — layer by layer introduction.""" from manim import * class Architecture(Scene): - """Animates the full AstrAI system stack layer by layer.""" + """Reveals AstrAI's 5-layer inference stack, introducing each layer.""" def construct(self): - title = Text("AstrAI Architecture", font_size=48, color=BLUE) + title = Text("AstrAI Architecture", font_size=44, color=BLUE) self.play(Write(title)) - self.wait(0.2) - self.play(title.animate.to_edge(UP)) + self.play(title.animate.to_edge(UP, buff=0.3)) - layers_data = [ - (0.9, GREEN, "API Layer", ["FastAPI Server • OpenAI-Compatible API"]), - (0.9, BLUE, "Inference Engine", ["Streaming • Async • Batch Modes"]), - (1.6, YELLOW, "Continuous Batching Scheduler", - ["Cleanup → Refill → Prefill → Decode", - "Position-Grouped Decode", - "Bitmask O(1) Slot Allocation"]), - (1.2, ORANGE, "Prefix Cache + KV Cache", - ["Radix Tree • Slot Versioning", - "GPU copy_() → Zero-Copy Reuse"]), - (1.2, PURPLE, "Transformer Model (1B params)", - ["24-layer GQA • RoPE • SwiGLU", - "bfloat16 • 100K vocab"]), - ] + W = 8.0 - layers = VGroup() - for height, color, label, subs in layers_data: - box = Rectangle(width=7.5, height=height, color=color, fill_opacity=0.1) - lbl = Text(label, font_size=18, color=color) - items = [lbl] + [Text(s, font_size=11, color=WHITE) for s in subs] - content = VGroup(*items) - content.arrange(DOWN, buff=0.22) - content.move_to(box.get_center()) - layers.add(VGroup(box, content)) + def box(h=1.05, color=GRAY, fill=0.08): + return Rectangle(width=W, height=h, color=color, fill_opacity=fill, stroke_width=1.5) - layers.arrange(DOWN, buff=0.18) - layers.next_to(title, DOWN, buff=0.3) + def layer_header(text, color): + return Text(text, font_size=20, color=color, weight=BOLD) - for i in range(len(layers)): - self.play(Create(layers[i]), run_time=0.35) + def sub(text): + return Text(text, font_size=12, color=WHITE) + + def intro(title, detail, color, oneline=None): + """Animate a layer: box + title → details → brief pause.""" + b = box(color=color, fill=0.1) + content = VGroup(title) + if oneline: + content.add(oneline) + if detail: + items = [title] + if oneline: + items.append(oneline) + items.extend(detail) + content = VGroup(*items) + else: + content = VGroup(title) if not oneline else VGroup(title, oneline) + content.arrange(DOWN, buff=0.15) + content.move_to(b.get_center()) + grp = VGroup(b, content) + return grp + + layers = [] + + # ── Layer 1: API Server ── + l1_t = layer_header("HTTP API Server", GREEN) + l1_d = [sub("FastAPI • OpenAI-Compatible /v1/chat/completions"), + sub("Streaming SSE • Async • Health/Stats Endpoints")] + l1 = intro(l1_t, l1_d, GREEN, sub("astrai/inference/server.py")) + l1.next_to(title, DOWN, buff=0.35) + layers.append(l1) + + # ── Layer 2: Inference Engine ── + l2_t = layer_header("InferenceEngine", BLUE) + l2_d = [sub("generate() · generate_async() · generate_with_request()"), + sub("Batch mode · Streaming (Generator) · Thread-safe accumulator")] + l2 = intro(l2_t, l2_d, BLUE, sub("astrai/inference/engine.py")) + l2.next_to(l1, DOWN, buff=0.12) + layers.append(l2) + + # ── Layer 3: Continuous Batching Scheduler ── + l3_t = layer_header("InferenceScheduler (Background Daemon)", YELLOW) + l3_d = [sub("Cleanup → Refill → Prefill → Decode · 4-phase loop"), + sub("Position-Grouped Decode · Bitmask O(1) Slot Allocation")] + l3 = intro(l3_t, l3_d, YELLOW, sub("astrai/inference/scheduler.py")) + l3.next_to(l2, DOWN, buff=0.12) + layers.append(l3) + + # ── Layer 4: Prefix Cache + KV Cache ── + l4_t = layer_header("PrefixCacheManager + KV Cache", ORANGE) + l4_d = [sub("Radix Tree prefix matching · LRU eviction · Slot versioning"), + sub("GPU copy_() → Zero-Copy Reuse · k_cache / v_cache tensors")] + l4 = intro(l4_t, l4_d, ORANGE, sub("astrai/inference/scheduler.py")) + l4.next_to(l3, DOWN, buff=0.12) + layers.append(l4) + + # ── Layer 5: Transformer Model ── + l5_t = layer_header("Transformer (1B params)", PURPLE) + l5_d = [sub("24× DecoderBlock · GQA 6:1 · RoPE · SwiGLU MLP"), + sub("Dim 1536 · Max Length 2048 · bfloat16 · 100K vocab")] + l5 = intro(l5_t, l5_d, PURPLE, sub("astrai/model/transformer.py")) + l5.next_to(l4, DOWN, buff=0.12) + layers.append(l5) + + # ── Animate layer by layer ── + arrows = VGroup() + for i, layer in enumerate(layers): + self.play(Create(layer), run_time=0.4) + self.wait(1.0 if i < 2 else 0.8) if i > 0: - # Use box-to-box for arrow endpoints (not content) - prev_box = layers[i - 1][0] - curr_box = layers[i][0] + prev = layers[i - 1][0] + curr = layer[0] arrow = Arrow( - prev_box.get_bottom(), - curr_box.get_top(), - color=GRAY, - buff=0.1, - max_tip_length_to_length_ratio=0.15, + prev.get_bottom(), curr.get_top(), + color=GRAY, buff=0.06, + max_tip_length_to_length_ratio=0.18, ) self.play(Create(arrow), run_time=0.15) + arrows.add(arrow) - self.wait(0.5) + self.wait(0.6) - hl = SurroundingRectangle(layers[3], color=GREEN, buff=0.12) - hl_note = Text("Zero-Copy Prefix Reuse", font_size=18, color=GREEN) - hl_note.next_to(hl, LEFT, buff=0.4) - self.play(Create(hl), Write(hl_note)) - self.wait(1.5) - self.play(FadeOut(hl), FadeOut(hl_note)) + # ── Highlight: the innovation layers ── + hl3 = SurroundingRectangle(layers[2], color=YELLOW, buff=0.1, stroke_width=2) + hl4 = SurroundingRectangle(layers[3], color=ORANGE, buff=0.1, stroke_width=2) + hl_note = Text("Key Innovations: Continuous Batching + Prefix Cache", + font_size=18, color=GOLD) + hl_note.next_to(VGroup(hl3, hl4), LEFT, buff=0.5) + self.play(Create(hl3), Create(hl4), Write(hl_note)) + self.wait(2.0) + self.play(FadeOut(hl3), FadeOut(hl4), FadeOut(hl_note)) - self.play(FadeOut(layers)) + # ── Fade to CTA ── + self.play(FadeOut(VGroup(*layers)), FadeOut(arrows)) cta = VGroup( Text("AstrAI", font_size=52, color=BLUE), - Text("Single GPU • Open Source • 1B params", font_size=24, color=GRAY), - Text("github.com/ViperEkura/AstrAI", font_size=20, color=YELLOW), + Text("Single GPU · Open Source · 1B params", font_size=22, color=GRAY), + Text("github.com/ViperEkura/AstrAI", font_size=18, color=YELLOW), ).arrange(DOWN, buff=0.35) + cta.move_to(ORIGIN) self.play(Write(cta)) - self.wait(2) + self.wait(2.5) self.play(FadeOut(cta), FadeOut(title))