AstrAI/astrai/inference/__init__.py

"""Inference module for continuous batching.

Layers:
  - core/:           Core inference loop (cache, executor, scheduler, task)
  - api/:            HTTP protocol handlers (OpenAI, Anthropic)
  - engine.py:       Facade (InferenceEngine), Value Object (GenerationRequest)
  - sample.py:       Strategy pattern (TemperatureStrategy, TopKStrategy, TopPStrategy)
"""

from astrai.inference.api import (
    AnthropicHandler,
    AnthropicMessage,
    ChatCompletionRequest,
    ChatMessage,
    MessagesRequest,
    OpenAIHandler,
    ProtocolHandler,
    StopChecker,
    StreamContext,
    app,
    run_server,
)
from astrai.inference.core import (
    STOP,
    Allocator,
    Executor,
    InferenceScheduler,
    KVCache,
    KvcacheView,
    PagePool,
    PrefixCache,
    Storage,
    Task,
    TaskManager,
    TaskStatus,
    TaskTable,
    page_hash,
)
from astrai.inference.engine import (
    GenerationRequest,
    InferenceEngine,
)
from astrai.inference.sample import (
    BaseSamplingStrategy,
    SamplingPipeline,
    TemperatureStrategy,
    TopKStrategy,
    TopPStrategy,
    sample,
)

__all__ = [
    # Engine / Requests
    "InferenceEngine",
    "GenerationRequest",
    # Core scheduler
    "InferenceScheduler",
    "Executor",
    "STOP",
    "Task",
    "TaskManager",
    "TaskStatus",
    # Core cache
    "Allocator",
    "KVCache",
    "KvcacheView",
    "PagePool",
    "PrefixCache",
    "Storage",
    "TaskTable",
    "page_hash",
    # Sampling (Strategy pattern)
    "sample",
    "BaseSamplingStrategy",
    "TemperatureStrategy",
    "TopKStrategy",
    "TopPStrategy",
    "SamplingPipeline",
    # Protocol
    "ProtocolHandler",
    "StopChecker",
    "StreamContext",
    "AnthropicHandler",
    "OpenAIHandler",
    # Server
    "ChatMessage",
    "ChatCompletionRequest",
    "AnthropicMessage",
    "MessagesRequest",
    "app",
    "run_server",
]