video-promo/architecture.py

128 lines
5.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""AstrAI promo: 4-layer architecture — boxes left, explanations right."""
from manim import *
Text.set_default(font="Times New Roman")
class Architecture(Scene):
"""Boxes on left, description text on right for each layer."""
def construct(self):
title = Text("AstrAI Architecture", font_size=42, color=BLUE)
title.to_edge(UP, buff=0.25)
self.play(Write(title))
W, BH = 5.2, 1.15
BX = -3.6
TX = 3.6
def make_box(header, color, bits, src):
b = Rectangle(width=W, height=BH, color=color, fill_opacity=0.1, stroke_width=1.5)
h = Text(header, font_size=16, color=color)
items = [h]
for line in bits:
items.append(Text(line, font_size=10, color=WHITE))
items.append(Text(src, font_size=9, color=GRAY))
c = VGroup(*items).arrange(DOWN, buff=0.04)
c.move_to(b.get_center())
return VGroup(b, c)
L1 = make_box("HTTP API Server", GREEN,
["FastAPI · OpenAI-Compatible",
"/v1/chat/completions · SSE streaming"],
"astrai/inference/server.py")
L2 = make_box("Inference Engine", BLUE,
["generate() · batch mode · streaming",
"4-phase daemon: Cleanup → Refill → Prefill → Decode",
"Position-grouped decode · Bitmask O(1) slots"],
"astrai/inference/engine.py · scheduler.py")
L3 = make_box("Prefix Cache + KV Cache", ORANGE,
["Radix Tree prefix matching · LRU eviction",
"Slot versioning · GPU copy_() zero-copy reuse"],
"astrai/inference/scheduler.py")
L4 = make_box("Transformer Model", PURPLE,
["24× DecoderBlock · GQA 6:1 · RoPE",
"SwiGLU MLP · Dim 1536 · bfloat16"],
"astrai/model/transformer.py")
layers = VGroup(L1, L2, L3, L4)
layers.arrange(DOWN, buff=0.08)
layers.move_to([BX, 0, 0])
layers.next_to(title, DOWN, buff=0.25)
# Description panels (right side)
descs_text = [
["HTTP API Server",
"Receives chat requests via",
"OpenAI-compatible endpoints.",
"Streams generated tokens back",
"through Server-Sent Events."],
["Inference Engine",
"Orchestrates the full generation",
"pipeline with a background daemon.",
"4-phase loop: Cleanup tasks,",
"Refill batch, Prefill prompts,",
"Decode tokens one by one."],
["Prefix Cache + KV Cache",
"Caches key-value states using",
"a Radix Tree for O(n) prefix lookup.",
"Reuses matched prefixes via GPU",
"memcpy — zero recomputation."],
["Transformer Model (1B params)",
"Decoder-only Transformer with",
"Grouped-Query Attention (GQA 6:1).",
"RoPE rotary encoding, SwiGLU",
"activation, 100K vocabulary."],
]
def make_desc(lines, color):
els = [Text(lines[0], font_size=20, color=color)]
for ln in lines[1:]:
els.append(Text(ln, font_size=14, color=WHITE))
grp = VGroup(*els).arrange(DOWN, buff=0.1, aligned_edge=LEFT)
return grp
COLORS = [GREEN, BLUE, ORANGE, PURPLE]
descs = [make_desc(lns, c) for lns, c in zip(descs_text, COLORS)]
arrows = VGroup()
for i, (layer, desc) in enumerate(zip(layers, descs)):
b = layer[0]
self.play(Create(layer), run_time=0.35)
desc.next_to(b, RIGHT, buff=1.0)
desc.align_to(b, UP)
self.play(Write(desc), run_time=0.3)
self.wait(2.0 if i == 0 else 1.8)
if i < len(layers) - 1:
self.play(FadeOut(desc))
nxt = layers[i + 1][0]
arrow = Arrow(
b.get_bottom(), nxt.get_top(),
color=GRAY, buff=0.04,
max_tip_length_to_length_ratio=0.18,
)
self.play(Create(arrow), run_time=0.12)
arrows.add(arrow)
else:
self.wait(0.5)
self.play(FadeOut(desc))
# Show all boxes + arrows together briefly
self.wait(0.3)
# Highlight innovation layers
hl2 = SurroundingRectangle(L2, color=BLUE, buff=0.1, stroke_width=2)
hl3 = SurroundingRectangle(L3, color=ORANGE, buff=0.1, stroke_width=2)
hl_note = Text("Key Innovations", font_size=20, color=GOLD)
hl_note.next_to(VGroup(hl2, hl3), RIGHT, buff=1.5)
hl_note.align_to(hl2, UP)
self.play(Create(hl2), Create(hl3), Write(hl_note))
self.wait(2.0)
self.play(FadeOut(hl2), FadeOut(hl3), FadeOut(hl_note))
self.play(FadeOut(VGroup(*layers)), FadeOut(arrows), FadeOut(title))