Compare commits
No commits in common. "496f964979af5d39ce53e469fa23e3957a2d478e" and "e7d736a3b008809af4ffb35880367e74af3f667c" have entirely different histories.
496f964979
...
e7d736a3b0
163
architecture.py
163
architecture.py
|
|
@ -1,135 +1,76 @@
|
|||
"""AstrAI promo: 4-layer architecture — boxes left, explanations right."""
|
||||
"""AstrAI promo: Full architecture overview."""
|
||||
|
||||
from manim import *
|
||||
|
||||
|
||||
class Architecture(Scene):
|
||||
"""Boxes on left, description text on right for each layer."""
|
||||
"""Animates the full AstrAI system stack layer by layer."""
|
||||
|
||||
def construct(self):
|
||||
title = Text("AstrAI Architecture", font_size=42, color=BLUE)
|
||||
title.to_edge(UP, buff=0.25)
|
||||
title = Text("AstrAI Architecture", font_size=48, color=BLUE)
|
||||
self.play(Write(title))
|
||||
self.wait(0.2)
|
||||
self.play(title.animate.to_edge(UP))
|
||||
|
||||
W, BH = 5.2, 1.15
|
||||
BX = -3.6
|
||||
TX = 3.6
|
||||
|
||||
def make_box(header, color, bits, src):
|
||||
b = Rectangle(width=W, height=BH, color=color, fill_opacity=0.1, stroke_width=1.5)
|
||||
h = Text(header, font_size=16, color=color, weight=BOLD)
|
||||
items = [h]
|
||||
for line in bits:
|
||||
items.append(Text(line, font_size=10, color=WHITE))
|
||||
items.append(Text(src, font_size=9, color=GRAY))
|
||||
c = VGroup(*items).arrange(DOWN, buff=0.04)
|
||||
c.move_to(b.get_center())
|
||||
return VGroup(b, c)
|
||||
|
||||
L1 = make_box("HTTP API Server", GREEN,
|
||||
["FastAPI · OpenAI-Compatible",
|
||||
"/v1/chat/completions · SSE streaming"],
|
||||
"astrai/inference/server.py")
|
||||
|
||||
L2 = make_box("Inference Engine", BLUE,
|
||||
["generate() · batch mode · streaming",
|
||||
"4-phase daemon: Cleanup → Refill → Prefill → Decode",
|
||||
"Position-grouped decode · Bitmask O(1) slots"],
|
||||
"astrai/inference/engine.py · scheduler.py")
|
||||
|
||||
L3 = make_box("Prefix Cache + KV Cache", ORANGE,
|
||||
["Radix Tree prefix matching · LRU eviction",
|
||||
"Slot versioning · GPU copy_() zero-copy reuse"],
|
||||
"astrai/inference/scheduler.py")
|
||||
|
||||
L4 = make_box("Transformer Model", PURPLE,
|
||||
["24× DecoderBlock · GQA 6:1 · RoPE",
|
||||
"SwiGLU MLP · Dim 1536 · bfloat16"],
|
||||
"astrai/model/transformer.py")
|
||||
|
||||
layers = VGroup(L1, L2, L3, L4)
|
||||
layers.arrange(DOWN, buff=0.08)
|
||||
layers.move_to([BX, 0, 0])
|
||||
layers.next_to(title, DOWN, buff=0.25)
|
||||
|
||||
# Description panels (right side)
|
||||
descs_text = [
|
||||
["HTTP API Server",
|
||||
"Receives chat requests via",
|
||||
"OpenAI-compatible endpoints.",
|
||||
"Streams generated tokens back",
|
||||
"through Server-Sent Events."],
|
||||
["Inference Engine",
|
||||
"Orchestrates the full generation",
|
||||
"pipeline with a background daemon.",
|
||||
"4-phase loop: Cleanup tasks,",
|
||||
"Refill batch, Prefill prompts,",
|
||||
"Decode tokens one by one."],
|
||||
["Prefix Cache + KV Cache",
|
||||
"Caches key-value states using",
|
||||
"a Radix Tree for O(n) prefix lookup.",
|
||||
"Reuses matched prefixes via GPU",
|
||||
"memcpy — zero recomputation."],
|
||||
["Transformer Model (1B params)",
|
||||
"Decoder-only Transformer with",
|
||||
"Grouped-Query Attention (GQA 6:1).",
|
||||
"RoPE rotary encoding, SwiGLU",
|
||||
"activation, 100K vocabulary."],
|
||||
layers_data = [
|
||||
(0.9, GREEN, "API Layer", ["FastAPI Server • OpenAI-Compatible API"]),
|
||||
(0.9, BLUE, "Inference Engine", ["Streaming • Async • Batch Modes"]),
|
||||
(1.6, YELLOW, "Continuous Batching Scheduler",
|
||||
["Cleanup → Refill → Prefill → Decode",
|
||||
"Position-Grouped Decode",
|
||||
"Bitmask O(1) Slot Allocation"]),
|
||||
(1.2, ORANGE, "Prefix Cache + KV Cache",
|
||||
["Radix Tree • Slot Versioning",
|
||||
"GPU copy_() → Zero-Copy Reuse"]),
|
||||
(1.2, PURPLE, "Transformer Model (1B params)",
|
||||
["24-layer GQA • RoPE • SwiGLU",
|
||||
"bfloat16 • 100K vocab"]),
|
||||
]
|
||||
|
||||
def make_desc(lines, color):
|
||||
els = [Text(lines[0], font_size=20, color=color, weight=BOLD)]
|
||||
for ln in lines[1:]:
|
||||
els.append(Text(ln, font_size=14, color=WHITE))
|
||||
grp = VGroup(*els).arrange(DOWN, buff=0.1, aligned_edge=LEFT)
|
||||
return grp
|
||||
layers = VGroup()
|
||||
for height, color, label, subs in layers_data:
|
||||
box = Rectangle(width=7.5, height=height, color=color, fill_opacity=0.1)
|
||||
lbl = Text(label, font_size=18, color=color)
|
||||
items = [lbl] + [Text(s, font_size=11, color=WHITE) for s in subs]
|
||||
content = VGroup(*items)
|
||||
content.arrange(DOWN, buff=0.22)
|
||||
content.move_to(box.get_center())
|
||||
layers.add(VGroup(box, content))
|
||||
|
||||
COLORS = [GREEN, BLUE, ORANGE, PURPLE]
|
||||
descs = [make_desc(lns, c) for lns, c in zip(descs_text, COLORS)]
|
||||
layers.arrange(DOWN, buff=0.18)
|
||||
layers.next_to(title, DOWN, buff=0.3)
|
||||
|
||||
arrows = VGroup()
|
||||
for i, (layer, desc) in enumerate(zip(layers, descs)):
|
||||
b = layer[0]
|
||||
self.play(Create(layer), run_time=0.35)
|
||||
desc.next_to(b, RIGHT, buff=1.0)
|
||||
desc.align_to(b, UP)
|
||||
self.play(Write(desc), run_time=0.3)
|
||||
self.wait(2.0 if i == 0 else 1.8)
|
||||
if i < len(layers) - 1:
|
||||
self.play(FadeOut(desc))
|
||||
nxt = layers[i + 1][0]
|
||||
for i in range(len(layers)):
|
||||
self.play(Create(layers[i]), run_time=0.35)
|
||||
if i > 0:
|
||||
# Use box-to-box for arrow endpoints (not content)
|
||||
prev_box = layers[i - 1][0]
|
||||
curr_box = layers[i][0]
|
||||
arrow = Arrow(
|
||||
b.get_bottom(), nxt.get_top(),
|
||||
color=GRAY, buff=0.04,
|
||||
max_tip_length_to_length_ratio=0.18,
|
||||
prev_box.get_bottom(),
|
||||
curr_box.get_top(),
|
||||
color=GRAY,
|
||||
buff=0.1,
|
||||
max_tip_length_to_length_ratio=0.15,
|
||||
)
|
||||
self.play(Create(arrow), run_time=0.12)
|
||||
arrows.add(arrow)
|
||||
else:
|
||||
self.play(Create(arrow), run_time=0.15)
|
||||
|
||||
self.wait(0.5)
|
||||
self.play(FadeOut(desc))
|
||||
|
||||
# Show all boxes + arrows together briefly
|
||||
self.wait(0.3)
|
||||
hl = SurroundingRectangle(layers[3], color=GREEN, buff=0.12)
|
||||
hl_note = Text("Zero-Copy Prefix Reuse", font_size=18, color=GREEN)
|
||||
hl_note.next_to(hl, LEFT, buff=0.4)
|
||||
self.play(Create(hl), Write(hl_note))
|
||||
self.wait(1.5)
|
||||
self.play(FadeOut(hl), FadeOut(hl_note))
|
||||
|
||||
# Highlight innovation layers
|
||||
hl2 = SurroundingRectangle(L2, color=BLUE, buff=0.1, stroke_width=2)
|
||||
hl3 = SurroundingRectangle(L3, color=ORANGE, buff=0.1, stroke_width=2)
|
||||
hl_note = Text("Key Innovations", font_size=20, color=GOLD)
|
||||
hl_note.next_to(VGroup(hl2, hl3), RIGHT, buff=1.5)
|
||||
hl_note.align_to(hl2, UP)
|
||||
self.play(Create(hl2), Create(hl3), Write(hl_note))
|
||||
self.wait(2.0)
|
||||
self.play(FadeOut(hl2), FadeOut(hl3), FadeOut(hl_note))
|
||||
|
||||
self.play(FadeOut(VGroup(*layers)), FadeOut(arrows))
|
||||
self.play(FadeOut(layers))
|
||||
|
||||
cta = VGroup(
|
||||
Text("AstrAI", font_size=52, color=BLUE),
|
||||
Text("Single GPU · Open Source · 1B params", font_size=22, color=GRAY),
|
||||
Text("github.com/ViperEkura/AstrAI", font_size=18, color=YELLOW),
|
||||
Text("Single GPU • Open Source • 1B params", font_size=24, color=GRAY),
|
||||
Text("github.com/ViperEkura/AstrAI", font_size=20, color=YELLOW),
|
||||
).arrange(DOWN, buff=0.35)
|
||||
cta.move_to(ORIGIN)
|
||||
self.play(Write(cta))
|
||||
self.wait(2.5)
|
||||
self.wait(2)
|
||||
self.play(FadeOut(cta), FadeOut(title))
|
||||
|
|
|
|||
Loading…
Reference in New Issue