From 496f964979af5d39ce53e469fa23e3957a2d478e Mon Sep 17 00:00:00 2001
From: ViperEkura <3081035982@qq.com>
Date: Thu, 7 May 2026 12:18:33 +0800
Subject: [PATCH] =?UTF-8?q?refactor:=20architecture=20=E2=80=94=20boxes=20?=
 =?UTF-8?q?left,=20descriptions=20right,=204-layer=20layout?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 architecture.py | 188 +++++++++++++++++++++++++-----------------------
 1 file changed, 99 insertions(+), 89 deletions(-)

diff --git a/architecture.py b/architecture.py
index c3ad35b..263ad3f 100644
--- a/architecture.py
+++ b/architecture.py
@@ -1,117 +1,127 @@
-"""AstrAI promo: Full architecture overview — layer by layer introduction."""
+"""AstrAI promo: 4-layer architecture — boxes left, explanations right."""
 
 from manim import *
 
 
 class Architecture(Scene):
-    """Reveals AstrAI's 5-layer inference stack, introducing each layer."""
+    """Boxes on left, description text on right for each layer."""
 
     def construct(self):
-        title = Text("AstrAI Architecture", font_size=44, color=BLUE)
+        title = Text("AstrAI Architecture", font_size=42, color=BLUE)
+        title.to_edge(UP, buff=0.25)
         self.play(Write(title))
-        self.play(title.animate.to_edge(UP, buff=0.3))
 
-        W = 8.0
+        W, BH = 5.2, 1.15
+        BX = -3.6
+        TX = 3.6
 
-        def box(h=1.05, color=GRAY, fill=0.08):
-            return Rectangle(width=W, height=h, color=color, fill_opacity=fill, stroke_width=1.5)
+        def make_box(header, color, bits, src):
+            b = Rectangle(width=W, height=BH, color=color, fill_opacity=0.1, stroke_width=1.5)
+            h = Text(header, font_size=16, color=color, weight=BOLD)
+            items = [h]
+            for line in bits:
+                items.append(Text(line, font_size=10, color=WHITE))
+            items.append(Text(src, font_size=9, color=GRAY))
+            c = VGroup(*items).arrange(DOWN, buff=0.04)
+            c.move_to(b.get_center())
+            return VGroup(b, c)
 
-        def layer_header(text, color):
-            return Text(text, font_size=20, color=color, weight=BOLD)
+        L1 = make_box("HTTP API Server", GREEN,
+                      ["FastAPI  ·  OpenAI-Compatible",
+                       "/v1/chat/completions  ·  SSE streaming"],
+                      "astrai/inference/server.py")
 
-        def sub(text):
-            return Text(text, font_size=12, color=WHITE)
+        L2 = make_box("Inference Engine", BLUE,
+                      ["generate()  ·  batch mode  ·  streaming",
+                       "4-phase daemon: Cleanup → Refill → Prefill → Decode",
+                       "Position-grouped decode  ·  Bitmask O(1) slots"],
+                      "astrai/inference/engine.py  ·  scheduler.py")
 
-        def intro(title, detail, color, oneline=None):
-            """Animate a layer: box + title → details → brief pause."""
-            b = box(color=color, fill=0.1)
-            content = VGroup(title)
-            if oneline:
-                content.add(oneline)
-            if detail:
-                items = [title]
-                if oneline:
-                    items.append(oneline)
-                items.extend(detail)
-                content = VGroup(*items)
-            else:
-                content = VGroup(title) if not oneline else VGroup(title, oneline)
-            content.arrange(DOWN, buff=0.15)
-            content.move_to(b.get_center())
-            grp = VGroup(b, content)
+        L3 = make_box("Prefix Cache  +  KV Cache", ORANGE,
+                      ["Radix Tree prefix matching  ·  LRU eviction",
+                       "Slot versioning  ·  GPU copy_() zero-copy reuse"],
+                      "astrai/inference/scheduler.py")
+
+        L4 = make_box("Transformer Model", PURPLE,
+                      ["24× DecoderBlock  ·  GQA 6:1  ·  RoPE",
+                       "SwiGLU MLP  ·  Dim 1536  ·  bfloat16"],
+                      "astrai/model/transformer.py")
+
+        layers = VGroup(L1, L2, L3, L4)
+        layers.arrange(DOWN, buff=0.08)
+        layers.move_to([BX, 0, 0])
+        layers.next_to(title, DOWN, buff=0.25)
+
+        # Description panels (right side)
+        descs_text = [
+            ["HTTP API Server",
+             "Receives chat requests via",
+             "OpenAI-compatible endpoints.",
+             "Streams generated tokens back",
+             "through Server-Sent Events."],
+            ["Inference Engine",
+             "Orchestrates the full generation",
+             "pipeline with a background daemon.",
+             "4-phase loop: Cleanup tasks,",
+             "Refill batch, Prefill prompts,",
+             "Decode tokens one by one."],
+            ["Prefix Cache + KV Cache",
+             "Caches key-value states using",
+             "a Radix Tree for O(n) prefix lookup.",
+             "Reuses matched prefixes via GPU",
+             "memcpy — zero recomputation."],
+            ["Transformer Model (1B params)",
+             "Decoder-only Transformer with",
+             "Grouped-Query Attention (GQA 6:1).",
+             "RoPE rotary encoding, SwiGLU",
+             "activation, 100K vocabulary."],
+        ]
+
+        def make_desc(lines, color):
+            els = [Text(lines[0], font_size=20, color=color, weight=BOLD)]
+            for ln in lines[1:]:
+                els.append(Text(ln, font_size=14, color=WHITE))
+            grp = VGroup(*els).arrange(DOWN, buff=0.1, aligned_edge=LEFT)
             return grp
 
-        layers = []
+        COLORS = [GREEN, BLUE, ORANGE, PURPLE]
+        descs = [make_desc(lns, c) for lns, c in zip(descs_text, COLORS)]
 
-        # ── Layer 1: API Server ──
-        l1_t = layer_header("HTTP API Server", GREEN)
-        l1_d = [sub("FastAPI  •  OpenAI-Compatible  /v1/chat/completions"),
-                sub("Streaming SSE  •  Async  •  Health/Stats Endpoints")]
-        l1 = intro(l1_t, l1_d, GREEN, sub("astrai/inference/server.py"))
-        l1.next_to(title, DOWN, buff=0.35)
-        layers.append(l1)
-
-        # ── Layer 2: Inference Engine ──
-        l2_t = layer_header("InferenceEngine", BLUE)
-        l2_d = [sub("generate()  ·  generate_async()  ·  generate_with_request()"),
-                sub("Batch mode  ·  Streaming (Generator)  ·  Thread-safe accumulator")]
-        l2 = intro(l2_t, l2_d, BLUE, sub("astrai/inference/engine.py"))
-        l2.next_to(l1, DOWN, buff=0.12)
-        layers.append(l2)
-
-        # ── Layer 3: Continuous Batching Scheduler ──
-        l3_t = layer_header("InferenceScheduler  (Background Daemon)", YELLOW)
-        l3_d = [sub("Cleanup → Refill → Prefill → Decode  ·  4-phase loop"),
-                sub("Position-Grouped Decode  ·  Bitmask O(1) Slot Allocation")]
-        l3 = intro(l3_t, l3_d, YELLOW, sub("astrai/inference/scheduler.py"))
-        l3.next_to(l2, DOWN, buff=0.12)
-        layers.append(l3)
-
-        # ── Layer 4: Prefix Cache + KV Cache ──
-        l4_t = layer_header("PrefixCacheManager  +  KV Cache", ORANGE)
-        l4_d = [sub("Radix Tree prefix matching  ·  LRU eviction  ·  Slot versioning"),
-                sub("GPU copy_() → Zero-Copy Reuse  ·  k_cache / v_cache tensors")]
-        l4 = intro(l4_t, l4_d, ORANGE, sub("astrai/inference/scheduler.py"))
-        l4.next_to(l3, DOWN, buff=0.12)
-        layers.append(l4)
-
-        # ── Layer 5: Transformer Model ──
-        l5_t = layer_header("Transformer  (1B params)", PURPLE)
-        l5_d = [sub("24× DecoderBlock  ·  GQA 6:1  ·  RoPE  ·  SwiGLU MLP"),
-                sub("Dim 1536  ·  Max Length 2048  ·  bfloat16  ·  100K vocab")]
-        l5 = intro(l5_t, l5_d, PURPLE, sub("astrai/model/transformer.py"))
-        l5.next_to(l4, DOWN, buff=0.12)
-        layers.append(l5)
-
-        # ── Animate layer by layer ──
         arrows = VGroup()
-        for i, layer in enumerate(layers):
-            self.play(Create(layer), run_time=0.4)
-            self.wait(1.0 if i < 2 else 0.8)
-            if i > 0:
-                prev = layers[i - 1][0]
-                curr = layer[0]
+        for i, (layer, desc) in enumerate(zip(layers, descs)):
+            b = layer[0]
+            self.play(Create(layer), run_time=0.35)
+            desc.next_to(b, RIGHT, buff=1.0)
+            desc.align_to(b, UP)
+            self.play(Write(desc), run_time=0.3)
+            self.wait(2.0 if i == 0 else 1.8)
+            if i < len(layers) - 1:
+                self.play(FadeOut(desc))
+                nxt = layers[i + 1][0]
                 arrow = Arrow(
-                    prev.get_bottom(), curr.get_top(),
-                    color=GRAY, buff=0.06,
+                    b.get_bottom(), nxt.get_top(),
+                    color=GRAY, buff=0.04,
                     max_tip_length_to_length_ratio=0.18,
                 )
-                self.play(Create(arrow), run_time=0.15)
+                self.play(Create(arrow), run_time=0.12)
                 arrows.add(arrow)
+            else:
+                self.wait(0.5)
+                self.play(FadeOut(desc))
 
-        self.wait(0.6)
+        # Show all boxes + arrows together briefly
+        self.wait(0.3)
 
-        # ── Highlight: the innovation layers ──
-        hl3 = SurroundingRectangle(layers[2], color=YELLOW, buff=0.1, stroke_width=2)
-        hl4 = SurroundingRectangle(layers[3], color=ORANGE, buff=0.1, stroke_width=2)
-        hl_note = Text("Key Innovations: Continuous Batching  +  Prefix Cache",
-                       font_size=18, color=GOLD)
-        hl_note.next_to(VGroup(hl3, hl4), LEFT, buff=0.5)
-        self.play(Create(hl3), Create(hl4), Write(hl_note))
+        # Highlight innovation layers
+        hl2 = SurroundingRectangle(L2, color=BLUE, buff=0.1, stroke_width=2)
+        hl3 = SurroundingRectangle(L3, color=ORANGE, buff=0.1, stroke_width=2)
+        hl_note = Text("Key Innovations", font_size=20, color=GOLD)
+        hl_note.next_to(VGroup(hl2, hl3), RIGHT, buff=1.5)
+        hl_note.align_to(hl2, UP)
+        self.play(Create(hl2), Create(hl3), Write(hl_note))
         self.wait(2.0)
-        self.play(FadeOut(hl3), FadeOut(hl4), FadeOut(hl_note))
+        self.play(FadeOut(hl2), FadeOut(hl3), FadeOut(hl_note))
 
-        # ── Fade to CTA ──
         self.play(FadeOut(VGroup(*layers)), FadeOut(arrows))
 
         cta = VGroup(