"""AstrAI promo: Full architecture overview — layer by layer introduction.""" from manim import * class Architecture(Scene): """Reveals AstrAI's 5-layer inference stack, introducing each layer.""" def construct(self): title = Text("AstrAI Architecture", font_size=44, color=BLUE) self.play(Write(title)) self.play(title.animate.to_edge(UP, buff=0.3)) W = 8.0 def box(h=1.05, color=GRAY, fill=0.08): return Rectangle(width=W, height=h, color=color, fill_opacity=fill, stroke_width=1.5) def layer_header(text, color): return Text(text, font_size=20, color=color, weight=BOLD) def sub(text): return Text(text, font_size=12, color=WHITE) def intro(title, detail, color, oneline=None): """Animate a layer: box + title → details → brief pause.""" b = box(color=color, fill=0.1) content = VGroup(title) if oneline: content.add(oneline) if detail: items = [title] if oneline: items.append(oneline) items.extend(detail) content = VGroup(*items) else: content = VGroup(title) if not oneline else VGroup(title, oneline) content.arrange(DOWN, buff=0.15) content.move_to(b.get_center()) grp = VGroup(b, content) return grp layers = [] # ── Layer 1: API Server ── l1_t = layer_header("HTTP API Server", GREEN) l1_d = [sub("FastAPI • OpenAI-Compatible /v1/chat/completions"), sub("Streaming SSE • Async • Health/Stats Endpoints")] l1 = intro(l1_t, l1_d, GREEN, sub("astrai/inference/server.py")) l1.next_to(title, DOWN, buff=0.35) layers.append(l1) # ── Layer 2: Inference Engine ── l2_t = layer_header("InferenceEngine", BLUE) l2_d = [sub("generate() · generate_async() · generate_with_request()"), sub("Batch mode · Streaming (Generator) · Thread-safe accumulator")] l2 = intro(l2_t, l2_d, BLUE, sub("astrai/inference/engine.py")) l2.next_to(l1, DOWN, buff=0.12) layers.append(l2) # ── Layer 3: Continuous Batching Scheduler ── l3_t = layer_header("InferenceScheduler (Background Daemon)", YELLOW) l3_d = [sub("Cleanup → Refill → Prefill → Decode · 4-phase loop"), sub("Position-Grouped Decode · Bitmask O(1) Slot Allocation")] l3 = intro(l3_t, l3_d, YELLOW, sub("astrai/inference/scheduler.py")) l3.next_to(l2, DOWN, buff=0.12) layers.append(l3) # ── Layer 4: Prefix Cache + KV Cache ── l4_t = layer_header("PrefixCacheManager + KV Cache", ORANGE) l4_d = [sub("Radix Tree prefix matching · LRU eviction · Slot versioning"), sub("GPU copy_() → Zero-Copy Reuse · k_cache / v_cache tensors")] l4 = intro(l4_t, l4_d, ORANGE, sub("astrai/inference/scheduler.py")) l4.next_to(l3, DOWN, buff=0.12) layers.append(l4) # ── Layer 5: Transformer Model ── l5_t = layer_header("Transformer (1B params)", PURPLE) l5_d = [sub("24× DecoderBlock · GQA 6:1 · RoPE · SwiGLU MLP"), sub("Dim 1536 · Max Length 2048 · bfloat16 · 100K vocab")] l5 = intro(l5_t, l5_d, PURPLE, sub("astrai/model/transformer.py")) l5.next_to(l4, DOWN, buff=0.12) layers.append(l5) # ── Animate layer by layer ── arrows = VGroup() for i, layer in enumerate(layers): self.play(Create(layer), run_time=0.4) self.wait(1.0 if i < 2 else 0.8) if i > 0: prev = layers[i - 1][0] curr = layer[0] arrow = Arrow( prev.get_bottom(), curr.get_top(), color=GRAY, buff=0.06, max_tip_length_to_length_ratio=0.18, ) self.play(Create(arrow), run_time=0.15) arrows.add(arrow) self.wait(0.6) # ── Highlight: the innovation layers ── hl3 = SurroundingRectangle(layers[2], color=YELLOW, buff=0.1, stroke_width=2) hl4 = SurroundingRectangle(layers[3], color=ORANGE, buff=0.1, stroke_width=2) hl_note = Text("Key Innovations: Continuous Batching + Prefix Cache", font_size=18, color=GOLD) hl_note.next_to(VGroup(hl3, hl4), LEFT, buff=0.5) self.play(Create(hl3), Create(hl4), Write(hl_note)) self.wait(2.0) self.play(FadeOut(hl3), FadeOut(hl4), FadeOut(hl_note)) # ── Fade to CTA ── self.play(FadeOut(VGroup(*layers)), FadeOut(arrows)) cta = VGroup( Text("AstrAI", font_size=52, color=BLUE), Text("Single GPU · Open Source · 1B params", font_size=22, color=GRAY), Text("github.com/ViperEkura/AstrAI", font_size=18, color=YELLOW), ).arrange(DOWN, buff=0.35) cta.move_to(ORIGIN) self.play(Write(cta)) self.wait(2.5) self.play(FadeOut(cta), FadeOut(title))