diff --git a/architecture.py b/architecture.py
index 56de9dc..c3ad35b 100644
--- a/architecture.py
+++ b/architecture.py
@@ -1,76 +1,125 @@
-"""AstrAI promo: Full architecture overview."""
+"""AstrAI promo: Full architecture overview — layer by layer introduction."""
 
 from manim import *
 
 
 class Architecture(Scene):
-    """Animates the full AstrAI system stack layer by layer."""
+    """Reveals AstrAI's 5-layer inference stack, introducing each layer."""
 
     def construct(self):
-        title = Text("AstrAI Architecture", font_size=48, color=BLUE)
+        title = Text("AstrAI Architecture", font_size=44, color=BLUE)
         self.play(Write(title))
-        self.wait(0.2)
-        self.play(title.animate.to_edge(UP))
+        self.play(title.animate.to_edge(UP, buff=0.3))
 
-        layers_data = [
-            (0.9, GREEN, "API Layer", ["FastAPI Server  •  OpenAI-Compatible API"]),
-            (0.9, BLUE, "Inference Engine", ["Streaming  •  Async  •  Batch Modes"]),
-            (1.6, YELLOW, "Continuous Batching Scheduler",
-             ["Cleanup → Refill → Prefill → Decode",
-              "Position-Grouped Decode",
-              "Bitmask O(1) Slot Allocation"]),
-            (1.2, ORANGE, "Prefix Cache + KV Cache",
-             ["Radix Tree  •  Slot Versioning",
-              "GPU copy_()  →  Zero-Copy Reuse"]),
-            (1.2, PURPLE, "Transformer Model (1B params)",
-             ["24-layer GQA  •  RoPE  •  SwiGLU",
-              "bfloat16  •  100K vocab"]),
-        ]
+        W = 8.0
 
-        layers = VGroup()
-        for height, color, label, subs in layers_data:
-            box = Rectangle(width=7.5, height=height, color=color, fill_opacity=0.1)
-            lbl = Text(label, font_size=18, color=color)
-            items = [lbl] + [Text(s, font_size=11, color=WHITE) for s in subs]
-            content = VGroup(*items)
-            content.arrange(DOWN, buff=0.22)
-            content.move_to(box.get_center())
-            layers.add(VGroup(box, content))
+        def box(h=1.05, color=GRAY, fill=0.08):
+            return Rectangle(width=W, height=h, color=color, fill_opacity=fill, stroke_width=1.5)
 
-        layers.arrange(DOWN, buff=0.18)
-        layers.next_to(title, DOWN, buff=0.3)
+        def layer_header(text, color):
+            return Text(text, font_size=20, color=color, weight=BOLD)
 
-        for i in range(len(layers)):
-            self.play(Create(layers[i]), run_time=0.35)
+        def sub(text):
+            return Text(text, font_size=12, color=WHITE)
+
+        def intro(title, detail, color, oneline=None):
+            """Animate a layer: box + title → details → brief pause."""
+            b = box(color=color, fill=0.1)
+            content = VGroup(title)
+            if oneline:
+                content.add(oneline)
+            if detail:
+                items = [title]
+                if oneline:
+                    items.append(oneline)
+                items.extend(detail)
+                content = VGroup(*items)
+            else:
+                content = VGroup(title) if not oneline else VGroup(title, oneline)
+            content.arrange(DOWN, buff=0.15)
+            content.move_to(b.get_center())
+            grp = VGroup(b, content)
+            return grp
+
+        layers = []
+
+        # ── Layer 1: API Server ──
+        l1_t = layer_header("HTTP API Server", GREEN)
+        l1_d = [sub("FastAPI  •  OpenAI-Compatible  /v1/chat/completions"),
+                sub("Streaming SSE  •  Async  •  Health/Stats Endpoints")]
+        l1 = intro(l1_t, l1_d, GREEN, sub("astrai/inference/server.py"))
+        l1.next_to(title, DOWN, buff=0.35)
+        layers.append(l1)
+
+        # ── Layer 2: Inference Engine ──
+        l2_t = layer_header("InferenceEngine", BLUE)
+        l2_d = [sub("generate()  ·  generate_async()  ·  generate_with_request()"),
+                sub("Batch mode  ·  Streaming (Generator)  ·  Thread-safe accumulator")]
+        l2 = intro(l2_t, l2_d, BLUE, sub("astrai/inference/engine.py"))
+        l2.next_to(l1, DOWN, buff=0.12)
+        layers.append(l2)
+
+        # ── Layer 3: Continuous Batching Scheduler ──
+        l3_t = layer_header("InferenceScheduler  (Background Daemon)", YELLOW)
+        l3_d = [sub("Cleanup → Refill → Prefill → Decode  ·  4-phase loop"),
+                sub("Position-Grouped Decode  ·  Bitmask O(1) Slot Allocation")]
+        l3 = intro(l3_t, l3_d, YELLOW, sub("astrai/inference/scheduler.py"))
+        l3.next_to(l2, DOWN, buff=0.12)
+        layers.append(l3)
+
+        # ── Layer 4: Prefix Cache + KV Cache ──
+        l4_t = layer_header("PrefixCacheManager  +  KV Cache", ORANGE)
+        l4_d = [sub("Radix Tree prefix matching  ·  LRU eviction  ·  Slot versioning"),
+                sub("GPU copy_() → Zero-Copy Reuse  ·  k_cache / v_cache tensors")]
+        l4 = intro(l4_t, l4_d, ORANGE, sub("astrai/inference/scheduler.py"))
+        l4.next_to(l3, DOWN, buff=0.12)
+        layers.append(l4)
+
+        # ── Layer 5: Transformer Model ──
+        l5_t = layer_header("Transformer  (1B params)", PURPLE)
+        l5_d = [sub("24× DecoderBlock  ·  GQA 6:1  ·  RoPE  ·  SwiGLU MLP"),
+                sub("Dim 1536  ·  Max Length 2048  ·  bfloat16  ·  100K vocab")]
+        l5 = intro(l5_t, l5_d, PURPLE, sub("astrai/model/transformer.py"))
+        l5.next_to(l4, DOWN, buff=0.12)
+        layers.append(l5)
+
+        # ── Animate layer by layer ──
+        arrows = VGroup()
+        for i, layer in enumerate(layers):
+            self.play(Create(layer), run_time=0.4)
+            self.wait(1.0 if i < 2 else 0.8)
             if i > 0:
-                # Use box-to-box for arrow endpoints (not content)
-                prev_box = layers[i - 1][0]
-                curr_box = layers[i][0]
+                prev = layers[i - 1][0]
+                curr = layer[0]
                 arrow = Arrow(
-                    prev_box.get_bottom(),
-                    curr_box.get_top(),
-                    color=GRAY,
-                    buff=0.1,
-                    max_tip_length_to_length_ratio=0.15,
+                    prev.get_bottom(), curr.get_top(),
+                    color=GRAY, buff=0.06,
+                    max_tip_length_to_length_ratio=0.18,
                 )
                 self.play(Create(arrow), run_time=0.15)
+                arrows.add(arrow)
 
-        self.wait(0.5)
+        self.wait(0.6)
 
-        hl = SurroundingRectangle(layers[3], color=GREEN, buff=0.12)
-        hl_note = Text("Zero-Copy Prefix Reuse", font_size=18, color=GREEN)
-        hl_note.next_to(hl, LEFT, buff=0.4)
-        self.play(Create(hl), Write(hl_note))
-        self.wait(1.5)
-        self.play(FadeOut(hl), FadeOut(hl_note))
+        # ── Highlight: the innovation layers ──
+        hl3 = SurroundingRectangle(layers[2], color=YELLOW, buff=0.1, stroke_width=2)
+        hl4 = SurroundingRectangle(layers[3], color=ORANGE, buff=0.1, stroke_width=2)
+        hl_note = Text("Key Innovations: Continuous Batching  +  Prefix Cache",
+                       font_size=18, color=GOLD)
+        hl_note.next_to(VGroup(hl3, hl4), LEFT, buff=0.5)
+        self.play(Create(hl3), Create(hl4), Write(hl_note))
+        self.wait(2.0)
+        self.play(FadeOut(hl3), FadeOut(hl4), FadeOut(hl_note))
 
-        self.play(FadeOut(layers))
+        # ── Fade to CTA ──
+        self.play(FadeOut(VGroup(*layers)), FadeOut(arrows))
 
         cta = VGroup(
             Text("AstrAI", font_size=52, color=BLUE),
-            Text("Single GPU  •  Open Source  •  1B params", font_size=24, color=GRAY),
-            Text("github.com/ViperEkura/AstrAI", font_size=20, color=YELLOW),
+            Text("Single GPU  ·  Open Source  ·  1B params", font_size=22, color=GRAY),
+            Text("github.com/ViperEkura/AstrAI", font_size=18, color=YELLOW),
         ).arrange(DOWN, buff=0.35)
+        cta.move_to(ORIGIN)
         self.play(Write(cta))
-        self.wait(2)
+        self.wait(2.5)
         self.play(FadeOut(cta), FadeOut(title))