1 changed files with 52 additions and 111 deletions
--- a/architecture.py
+++ b/architecture.py
@ -1,135 +1,76 @@
-"""AstrAI promo: 4-layer architecture — boxes left, explanations right."""
+"""AstrAI promo: Full architecture overview."""

 from manim import *


 class Architecture(Scene):
-    """Boxes on left, description text on right for each layer."""
+    """Animates the full AstrAI system stack layer by layer."""

    def construct(self):
-        title = Text("AstrAI Architecture", font_size=42, color=BLUE)
-        title.to_edge(UP, buff=0.25)
+        title = Text("AstrAI Architecture", font_size=48, color=BLUE)
        self.play(Write(title))
+        self.wait(0.2)
+        self.play(title.animate.to_edge(UP))

-        W, BH = 5.2, 1.15
-        BX = -3.6
-        TX = 3.6
-
-        def make_box(header, color, bits, src):
-            b = Rectangle(width=W, height=BH, color=color, fill_opacity=0.1, stroke_width=1.5)
-            h = Text(header, font_size=16, color=color, weight=BOLD)
-            items = [h]
-            for line in bits:
-                items.append(Text(line, font_size=10, color=WHITE))
-            items.append(Text(src, font_size=9, color=GRAY))
-            c = VGroup(*items).arrange(DOWN, buff=0.04)
-            c.move_to(b.get_center())
-            return VGroup(b, c)
-
-        L1 = make_box("HTTP API Server", GREEN,
-                      ["FastAPI  ·  OpenAI-Compatible",
-                       "/v1/chat/completions  ·  SSE streaming"],
-                      "astrai/inference/server.py")
-
-        L2 = make_box("Inference Engine", BLUE,
-                      ["generate()  ·  batch mode  ·  streaming",
-                       "4-phase daemon: Cleanup → Refill → Prefill → Decode",
-                       "Position-grouped decode  ·  Bitmask O(1) slots"],
-                      "astrai/inference/engine.py  ·  scheduler.py")
-
-        L3 = make_box("Prefix Cache  +  KV Cache", ORANGE,
-                      ["Radix Tree prefix matching  ·  LRU eviction",
-                       "Slot versioning  ·  GPU copy_() zero-copy reuse"],
-                      "astrai/inference/scheduler.py")
-
-        L4 = make_box("Transformer Model", PURPLE,
-                      ["24× DecoderBlock  ·  GQA 6:1  ·  RoPE",
-                       "SwiGLU MLP  ·  Dim 1536  ·  bfloat16"],
-                      "astrai/model/transformer.py")
-
-        layers = VGroup(L1, L2, L3, L4)
-        layers.arrange(DOWN, buff=0.08)
-        layers.move_to([BX, 0, 0])
-        layers.next_to(title, DOWN, buff=0.25)
-
-        # Description panels (right side)
-        descs_text = [
-            ["HTTP API Server",
-             "Receives chat requests via",
-             "OpenAI-compatible endpoints.",
-             "Streams generated tokens back",
-             "through Server-Sent Events."],
-            ["Inference Engine",
-             "Orchestrates the full generation",
-             "pipeline with a background daemon.",
-             "4-phase loop: Cleanup tasks,",
-             "Refill batch, Prefill prompts,",
-             "Decode tokens one by one."],
-            ["Prefix Cache + KV Cache",
-             "Caches key-value states using",
-             "a Radix Tree for O(n) prefix lookup.",
-             "Reuses matched prefixes via GPU",
-             "memcpy — zero recomputation."],
-            ["Transformer Model (1B params)",
-             "Decoder-only Transformer with",
-             "Grouped-Query Attention (GQA 6:1).",
-             "RoPE rotary encoding, SwiGLU",
-             "activation, 100K vocabulary."],
+        layers_data = [
+            (0.9, GREEN, "API Layer", ["FastAPI Server  •  OpenAI-Compatible API"]),
+            (0.9, BLUE, "Inference Engine", ["Streaming  •  Async  •  Batch Modes"]),
+            (1.6, YELLOW, "Continuous Batching Scheduler",
+             ["Cleanup → Refill → Prefill → Decode",
+              "Position-Grouped Decode",
+              "Bitmask O(1) Slot Allocation"]),
+            (1.2, ORANGE, "Prefix Cache + KV Cache",
+             ["Radix Tree  •  Slot Versioning",
+              "GPU copy_()  →  Zero-Copy Reuse"]),
+            (1.2, PURPLE, "Transformer Model (1B params)",
+             ["24-layer GQA  •  RoPE  •  SwiGLU",
+              "bfloat16  •  100K vocab"]),
        ]

-        def make_desc(lines, color):
-            els = [Text(lines[0], font_size=20, color=color, weight=BOLD)]
-            for ln in lines[1:]:
-                els.append(Text(ln, font_size=14, color=WHITE))
-            grp = VGroup(*els).arrange(DOWN, buff=0.1, aligned_edge=LEFT)
-            return grp
+        layers = VGroup()
+        for height, color, label, subs in layers_data:
+            box = Rectangle(width=7.5, height=height, color=color, fill_opacity=0.1)
+            lbl = Text(label, font_size=18, color=color)
+            items = [lbl] + [Text(s, font_size=11, color=WHITE) for s in subs]
+            content = VGroup(*items)
+            content.arrange(DOWN, buff=0.22)
+            content.move_to(box.get_center())
+            layers.add(VGroup(box, content))

-        COLORS = [GREEN, BLUE, ORANGE, PURPLE]
-        descs = [make_desc(lns, c) for lns, c in zip(descs_text, COLORS)]
+        layers.arrange(DOWN, buff=0.18)
+        layers.next_to(title, DOWN, buff=0.3)

-        arrows = VGroup()
-        for i, (layer, desc) in enumerate(zip(layers, descs)):
-            b = layer[0]
-            self.play(Create(layer), run_time=0.35)
-            desc.next_to(b, RIGHT, buff=1.0)
-            desc.align_to(b, UP)
-            self.play(Write(desc), run_time=0.3)
-            self.wait(2.0 if i == 0 else 1.8)
-            if i < len(layers) - 1:
-                self.play(FadeOut(desc))
-                nxt = layers[i + 1][0]
+        for i in range(len(layers)):
+            self.play(Create(layers[i]), run_time=0.35)
+            if i > 0:
+                # Use box-to-box for arrow endpoints (not content)
+                prev_box = layers[i - 1][0]
+                curr_box = layers[i][0]
                arrow = Arrow(
-                    b.get_bottom(), nxt.get_top(),
-                    color=GRAY, buff=0.04,
-                    max_tip_length_to_length_ratio=0.18,
+                    prev_box.get_bottom(),
+                    curr_box.get_top(),
+                    color=GRAY,
+                    buff=0.1,
+                    max_tip_length_to_length_ratio=0.15,
                )
-                self.play(Create(arrow), run_time=0.12)
-                arrows.add(arrow)
-            else:
+                self.play(Create(arrow), run_time=0.15)
+
        self.wait(0.5)
-                self.play(FadeOut(desc))

-        # Show all boxes + arrows together briefly
-        self.wait(0.3)
+        hl = SurroundingRectangle(layers[3], color=GREEN, buff=0.12)
+        hl_note = Text("Zero-Copy Prefix Reuse", font_size=18, color=GREEN)
+        hl_note.next_to(hl, LEFT, buff=0.4)
+        self.play(Create(hl), Write(hl_note))
+        self.wait(1.5)
+        self.play(FadeOut(hl), FadeOut(hl_note))

-        # Highlight innovation layers
-        hl2 = SurroundingRectangle(L2, color=BLUE, buff=0.1, stroke_width=2)
-        hl3 = SurroundingRectangle(L3, color=ORANGE, buff=0.1, stroke_width=2)
-        hl_note = Text("Key Innovations", font_size=20, color=GOLD)
-        hl_note.next_to(VGroup(hl2, hl3), RIGHT, buff=1.5)
-        hl_note.align_to(hl2, UP)
-        self.play(Create(hl2), Create(hl3), Write(hl_note))
-        self.wait(2.0)
-        self.play(FadeOut(hl2), FadeOut(hl3), FadeOut(hl_note))
-
-        self.play(FadeOut(VGroup(*layers)), FadeOut(arrows))
+        self.play(FadeOut(layers))

        cta = VGroup(
            Text("AstrAI", font_size=52, color=BLUE),
-            Text("Single GPU  ·  Open Source  ·  1B params", font_size=22, color=GRAY),
-            Text("github.com/ViperEkura/AstrAI", font_size=18, color=YELLOW),
+            Text("Single GPU  •  Open Source  •  1B params", font_size=24, color=GRAY),
+            Text("github.com/ViperEkura/AstrAI", font_size=20, color=YELLOW),
        ).arrange(DOWN, buff=0.35)
-        cta.move_to(ORIGIN)
        self.play(Write(cta))
-        self.wait(2.5)
+        self.wait(2)
        self.play(FadeOut(cta), FadeOut(title))