refactor: architecture layer-by-layer introduction with source file refs
This commit is contained in:
parent
e7d736a3b0
commit
4d96a84fc5
149
architecture.py
149
architecture.py
|
|
@ -1,76 +1,125 @@
|
|||
"""AstrAI promo: Full architecture overview."""
|
||||
"""AstrAI promo: Full architecture overview — layer by layer introduction."""
|
||||
|
||||
from manim import *
|
||||
|
||||
|
||||
class Architecture(Scene):
|
||||
"""Animates the full AstrAI system stack layer by layer."""
|
||||
"""Reveals AstrAI's 5-layer inference stack, introducing each layer."""
|
||||
|
||||
def construct(self):
|
||||
title = Text("AstrAI Architecture", font_size=48, color=BLUE)
|
||||
title = Text("AstrAI Architecture", font_size=44, color=BLUE)
|
||||
self.play(Write(title))
|
||||
self.wait(0.2)
|
||||
self.play(title.animate.to_edge(UP))
|
||||
self.play(title.animate.to_edge(UP, buff=0.3))
|
||||
|
||||
layers_data = [
|
||||
(0.9, GREEN, "API Layer", ["FastAPI Server • OpenAI-Compatible API"]),
|
||||
(0.9, BLUE, "Inference Engine", ["Streaming • Async • Batch Modes"]),
|
||||
(1.6, YELLOW, "Continuous Batching Scheduler",
|
||||
["Cleanup → Refill → Prefill → Decode",
|
||||
"Position-Grouped Decode",
|
||||
"Bitmask O(1) Slot Allocation"]),
|
||||
(1.2, ORANGE, "Prefix Cache + KV Cache",
|
||||
["Radix Tree • Slot Versioning",
|
||||
"GPU copy_() → Zero-Copy Reuse"]),
|
||||
(1.2, PURPLE, "Transformer Model (1B params)",
|
||||
["24-layer GQA • RoPE • SwiGLU",
|
||||
"bfloat16 • 100K vocab"]),
|
||||
]
|
||||
W = 8.0
|
||||
|
||||
layers = VGroup()
|
||||
for height, color, label, subs in layers_data:
|
||||
box = Rectangle(width=7.5, height=height, color=color, fill_opacity=0.1)
|
||||
lbl = Text(label, font_size=18, color=color)
|
||||
items = [lbl] + [Text(s, font_size=11, color=WHITE) for s in subs]
|
||||
def box(h=1.05, color=GRAY, fill=0.08):
|
||||
return Rectangle(width=W, height=h, color=color, fill_opacity=fill, stroke_width=1.5)
|
||||
|
||||
def layer_header(text, color):
|
||||
return Text(text, font_size=20, color=color, weight=BOLD)
|
||||
|
||||
def sub(text):
|
||||
return Text(text, font_size=12, color=WHITE)
|
||||
|
||||
def intro(title, detail, color, oneline=None):
|
||||
"""Animate a layer: box + title → details → brief pause."""
|
||||
b = box(color=color, fill=0.1)
|
||||
content = VGroup(title)
|
||||
if oneline:
|
||||
content.add(oneline)
|
||||
if detail:
|
||||
items = [title]
|
||||
if oneline:
|
||||
items.append(oneline)
|
||||
items.extend(detail)
|
||||
content = VGroup(*items)
|
||||
content.arrange(DOWN, buff=0.22)
|
||||
content.move_to(box.get_center())
|
||||
layers.add(VGroup(box, content))
|
||||
else:
|
||||
content = VGroup(title) if not oneline else VGroup(title, oneline)
|
||||
content.arrange(DOWN, buff=0.15)
|
||||
content.move_to(b.get_center())
|
||||
grp = VGroup(b, content)
|
||||
return grp
|
||||
|
||||
layers.arrange(DOWN, buff=0.18)
|
||||
layers.next_to(title, DOWN, buff=0.3)
|
||||
layers = []
|
||||
|
||||
for i in range(len(layers)):
|
||||
self.play(Create(layers[i]), run_time=0.35)
|
||||
# ── Layer 1: API Server ──
|
||||
l1_t = layer_header("HTTP API Server", GREEN)
|
||||
l1_d = [sub("FastAPI • OpenAI-Compatible /v1/chat/completions"),
|
||||
sub("Streaming SSE • Async • Health/Stats Endpoints")]
|
||||
l1 = intro(l1_t, l1_d, GREEN, sub("astrai/inference/server.py"))
|
||||
l1.next_to(title, DOWN, buff=0.35)
|
||||
layers.append(l1)
|
||||
|
||||
# ── Layer 2: Inference Engine ──
|
||||
l2_t = layer_header("InferenceEngine", BLUE)
|
||||
l2_d = [sub("generate() · generate_async() · generate_with_request()"),
|
||||
sub("Batch mode · Streaming (Generator) · Thread-safe accumulator")]
|
||||
l2 = intro(l2_t, l2_d, BLUE, sub("astrai/inference/engine.py"))
|
||||
l2.next_to(l1, DOWN, buff=0.12)
|
||||
layers.append(l2)
|
||||
|
||||
# ── Layer 3: Continuous Batching Scheduler ──
|
||||
l3_t = layer_header("InferenceScheduler (Background Daemon)", YELLOW)
|
||||
l3_d = [sub("Cleanup → Refill → Prefill → Decode · 4-phase loop"),
|
||||
sub("Position-Grouped Decode · Bitmask O(1) Slot Allocation")]
|
||||
l3 = intro(l3_t, l3_d, YELLOW, sub("astrai/inference/scheduler.py"))
|
||||
l3.next_to(l2, DOWN, buff=0.12)
|
||||
layers.append(l3)
|
||||
|
||||
# ── Layer 4: Prefix Cache + KV Cache ──
|
||||
l4_t = layer_header("PrefixCacheManager + KV Cache", ORANGE)
|
||||
l4_d = [sub("Radix Tree prefix matching · LRU eviction · Slot versioning"),
|
||||
sub("GPU copy_() → Zero-Copy Reuse · k_cache / v_cache tensors")]
|
||||
l4 = intro(l4_t, l4_d, ORANGE, sub("astrai/inference/scheduler.py"))
|
||||
l4.next_to(l3, DOWN, buff=0.12)
|
||||
layers.append(l4)
|
||||
|
||||
# ── Layer 5: Transformer Model ──
|
||||
l5_t = layer_header("Transformer (1B params)", PURPLE)
|
||||
l5_d = [sub("24× DecoderBlock · GQA 6:1 · RoPE · SwiGLU MLP"),
|
||||
sub("Dim 1536 · Max Length 2048 · bfloat16 · 100K vocab")]
|
||||
l5 = intro(l5_t, l5_d, PURPLE, sub("astrai/model/transformer.py"))
|
||||
l5.next_to(l4, DOWN, buff=0.12)
|
||||
layers.append(l5)
|
||||
|
||||
# ── Animate layer by layer ──
|
||||
arrows = VGroup()
|
||||
for i, layer in enumerate(layers):
|
||||
self.play(Create(layer), run_time=0.4)
|
||||
self.wait(1.0 if i < 2 else 0.8)
|
||||
if i > 0:
|
||||
# Use box-to-box for arrow endpoints (not content)
|
||||
prev_box = layers[i - 1][0]
|
||||
curr_box = layers[i][0]
|
||||
prev = layers[i - 1][0]
|
||||
curr = layer[0]
|
||||
arrow = Arrow(
|
||||
prev_box.get_bottom(),
|
||||
curr_box.get_top(),
|
||||
color=GRAY,
|
||||
buff=0.1,
|
||||
max_tip_length_to_length_ratio=0.15,
|
||||
prev.get_bottom(), curr.get_top(),
|
||||
color=GRAY, buff=0.06,
|
||||
max_tip_length_to_length_ratio=0.18,
|
||||
)
|
||||
self.play(Create(arrow), run_time=0.15)
|
||||
arrows.add(arrow)
|
||||
|
||||
self.wait(0.5)
|
||||
self.wait(0.6)
|
||||
|
||||
hl = SurroundingRectangle(layers[3], color=GREEN, buff=0.12)
|
||||
hl_note = Text("Zero-Copy Prefix Reuse", font_size=18, color=GREEN)
|
||||
hl_note.next_to(hl, LEFT, buff=0.4)
|
||||
self.play(Create(hl), Write(hl_note))
|
||||
self.wait(1.5)
|
||||
self.play(FadeOut(hl), FadeOut(hl_note))
|
||||
# ── Highlight: the innovation layers ──
|
||||
hl3 = SurroundingRectangle(layers[2], color=YELLOW, buff=0.1, stroke_width=2)
|
||||
hl4 = SurroundingRectangle(layers[3], color=ORANGE, buff=0.1, stroke_width=2)
|
||||
hl_note = Text("Key Innovations: Continuous Batching + Prefix Cache",
|
||||
font_size=18, color=GOLD)
|
||||
hl_note.next_to(VGroup(hl3, hl4), LEFT, buff=0.5)
|
||||
self.play(Create(hl3), Create(hl4), Write(hl_note))
|
||||
self.wait(2.0)
|
||||
self.play(FadeOut(hl3), FadeOut(hl4), FadeOut(hl_note))
|
||||
|
||||
self.play(FadeOut(layers))
|
||||
# ── Fade to CTA ──
|
||||
self.play(FadeOut(VGroup(*layers)), FadeOut(arrows))
|
||||
|
||||
cta = VGroup(
|
||||
Text("AstrAI", font_size=52, color=BLUE),
|
||||
Text("Single GPU • Open Source • 1B params", font_size=24, color=GRAY),
|
||||
Text("github.com/ViperEkura/AstrAI", font_size=20, color=YELLOW),
|
||||
Text("Single GPU · Open Source · 1B params", font_size=22, color=GRAY),
|
||||
Text("github.com/ViperEkura/AstrAI", font_size=18, color=YELLOW),
|
||||
).arrange(DOWN, buff=0.35)
|
||||
cta.move_to(ORIGIN)
|
||||
self.play(Write(cta))
|
||||
self.wait(2)
|
||||
self.wait(2.5)
|
||||
self.play(FadeOut(cta), FadeOut(title))
|
||||
|
|
|
|||
Loading…
Reference in New Issue