refactor : 清理工厂和配置系统中的死代码与冗余抽象

- 删除 Registry 中未使用的 category/priority 字段，_entries 简化为直接存储类引用 - 修正 __init_subclass__ 避免叶子类（AutoRegressiveLM 等）创建空注册表 - 删除 5 个工厂的薄 create() 覆写，统一使用 BaseFactory.create(name, *args, **kwargs) - 删除 3 处零调用的 available_types/available_strategies 别名死代码 - 删除零调用的 BaseModelConfig.to_file 死代码 - 将 BaseConfig.from_json/to_json 重命名为 from_file/to_file，消除与子类重复 - 移除两个 inference builder 中总是被覆写的 prompt_tokens=0
2026-06-07 11:39:50 +08:00 · 2026-06-07 11:39:50 +08:00 · 6ae1828449
parent e7b18b7c03
commit 6ae1828449
20 changed files with 31 additions and 114 deletions
--- a/astrai/config/base.py
+++ b/astrai/config/base.py
@ -89,10 +89,10 @@ class BaseConfig:
        raise TypeError

    @classmethod
-    def from_json(cls, path: Union[str, Path]) -> Self:
+    def from_file(cls, path: Union[str, Path]) -> Self:
        with open(path, "r", encoding="utf-8") as f:
            return cls.from_dict(json.load(f))

-    def to_json(self, path: Union[str, Path]):
+    def to_file(self, path: Union[str, Path]):
        with open(path, "w", encoding="utf-8") as f:
            json.dump(self.to_dict(), f, indent=2, ensure_ascii=False)
--- a/astrai/config/model_config.py
+++ b/astrai/config/model_config.py
@ -1,6 +1,5 @@
-import json
 from dataclasses import dataclass
-from typing import Any, Dict, Optional, Self
+from typing import Any, Dict, Optional

 from astrai.config.base import BaseConfig
 from astrai.factory import BaseFactory
@ -22,18 +21,6 @@ class BaseModelConfig(BaseConfig):

    model_type: Optional[str] = None

-    @classmethod
-    def from_file(cls, config_path: str) -> Self:
-        with open(config_path, "r") as f:
-            raw: Dict[str, Any] = json.load(f)
-        return cls.from_dict(raw)
-
-    def to_file(self, config_path: str):
-        d = self.to_dict()
-        config_dict = {k: v for k, v in d.items() if v is not None}
-        with open(config_path, "w") as f:
-            json.dump(config_dict, f, indent=4)
-

@dataclass
@ConfigFactory.register("autoregressive_lm")
--- a/astrai/dataset/dataset.py
+++ b/astrai/dataset/dataset.py
@ -136,20 +136,6 @@ class DatasetFactory(BaseFactory["BaseDataset"]):
        dataset = DatasetFactory.create("custom", window_size, stride)
    """

-    @classmethod
-    def create(cls, train_type: str, window_size: int, stride: int) -> "BaseDataset":
-        """Create a dataset instance.
-
-        Args:
-            train_type: Type of training ("seq", "sft", "dpo", "grpo")
-            window_size: Window size for data sampling
-            stride: Stride between consecutive samples
-
-        Returns:
-            Dataset instance
-        """
-        return super().create(train_type, window_size, stride)
-
    @classmethod
    def load(
        cls,
@ -179,11 +165,6 @@ class DatasetFactory(BaseFactory["BaseDataset"]):

        return dataset

-    @classmethod
-    def available_types(cls) -> list:
-        """Return list of registered dataset type names."""
-        return cls.list_registered()
-

@DatasetFactory.register("seq")
 class SEQDataset(BaseDataset):
--- a/astrai/factory.py
+++ b/astrai/factory.py
@ -8,8 +8,6 @@ from typing import (
    Dict,
    ForwardRef,
    Generic,
-    Optional,
-    Tuple,
    Type,
    TypeVar,
 )
@ -56,21 +54,19 @@ class BaseFactory(ABC, Generic[T]):
    unrelated parameters.
    """

-    _entries: Dict[str, Tuple[Type, Optional[str], int]]
+    _entries: Dict[str, Type[T]]

    def __init_subclass__(cls, **kwargs):
        super().__init_subclass__(**kwargs)
-        cls._entries = {}
        for orig_base in getattr(cls, "__orig_bases__", ()):
            if _get_origin(orig_base) is BaseFactory:
                (arg,) = _get_args(orig_base)
+                cls._entries = {}
                cls._component_base = _resolve_type(arg, cls)
                return

    @classmethod
-    def register(
-        cls, name: str, category: Optional[str] = None, priority: int = 0
-    ) -> Callable[[Type[T]], Type[T]]:
+    def register(cls, name: str) -> Callable[[Type[T]], Type[T]]:
        """Decorator to register a component class.

        Validates that the decorated class inherits from the generic
@ -81,7 +77,7 @@ class BaseFactory(ABC, Generic[T]):
            cls._validate_component(component_cls)
            if name in cls._entries:
                raise ValueError(f"Component '{name}' is already registered")
-            cls._entries[name] = (component_cls, category, priority)
+            cls._entries[name] = component_cls
            return component_cls

        return decorator
@ -96,7 +92,7 @@ class BaseFactory(ABC, Generic[T]):
            raise ValueError(
                f"Unknown component: '{name}'. Supported types: {sorted(cls._entries)}"
            )
-        component_cls = entry[0]
+        component_cls = entry
        sig = inspect.signature(component_cls.__init__)
        has_var_kwargs = any(
            p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()
@ -130,7 +126,7 @@ class BaseFactory(ABC, Generic[T]):
            raise ValueError(
                f"Unknown component: '{name}'. Supported types: {sorted(cls._entries)}"
            )
-        return entry[0]
+        return entry

    @classmethod
    def list_registered(cls) -> list:
--- a/astrai/inference/api/anthropic.py
+++ b/astrai/inference/api/anthropic.py
@ -42,7 +42,6 @@ class AnthropicResponseBuilder(ResponseBuilder):
            resp_id=f"msg_{uuid.uuid4().hex[:24]}",
            created=int(time.time()),
            model=request.model,
-            prompt_tokens=0,
        )
        stop_sequences = getattr(request, "stop_sequences", None) or []
        return prompt, ctx, stop_sequences
--- a/astrai/inference/api/openai.py
+++ b/astrai/inference/api/openai.py
@ -86,7 +86,6 @@ class OpenAIResponseBuilder(ResponseBuilder):
            resp_id=self._resp_id,
            created=int(time.time()),
            model=self._model,
-            prompt_tokens=0,
        )
        stop = request.stop
        stop_sequences = (
--- a/astrai/inference/api/protocol.py
+++ b/astrai/inference/api/protocol.py
@ -35,7 +35,7 @@ class GenContext:
    resp_id: str
    created: int
    model: str
-    prompt_tokens: int
+    prompt_tokens: int = 0
    completion_tokens: int = 0


--- a/astrai/model/components/attention.py
+++ b/astrai/model/components/attention.py
@ -24,9 +24,7 @@ def repeat_kv(x: Tensor, n_rep: int) -> Tensor:


 class AttnFactory(BaseFactory[nn.Module]):
-    @classmethod
-    def create(cls, attn_type: str, **kwargs) -> nn.Module:
-        return super().create(attn_type, **kwargs)
+    pass


@AttnFactory.register("gqa")
--- a/astrai/model/components/mlp.py
+++ b/astrai/model/components/mlp.py
@ -8,9 +8,7 @@ from astrai.model.components.linear import Linear


 class FFNFactory(BaseFactory[nn.Module]):
-    @classmethod
-    def create(cls, ffn_type: str, dim: int, dim_ffn: int, **kwargs) -> nn.Module:
-        return super().create(ffn_type, dim, dim_ffn, **kwargs)
+    pass


@FFNFactory.register("mlp")
--- a/astrai/preprocessing/pipeline.py
+++ b/astrai/preprocessing/pipeline.py
@ -44,7 +44,7 @@ class Pipeline:

    Usage::

-        config = PipelineConfig.from_json("sft_pipeline.json")
+        config = PipelineConfig.from_file("sft_pipeline.json")
        Pipeline(config, ["data.jsonl"], output_dir="out", tokenizer_path="params").run()
    """

--- a/astrai/trainer/schedule.py
+++ b/astrai/trainer/schedule.py
@ -31,7 +31,6 @@ class SchedulerFactory(BaseFactory["BaseScheduler"]):
    """Factory class for creating learning rate schedulers.

    Supports decorator-based registration for extensible scheduler types.
-    Also supports creation from ScheduleConfig objects.

    Example usage:
        @SchedulerFactory.register("custom")
@ -41,27 +40,6 @@ class SchedulerFactory(BaseFactory["BaseScheduler"]):
        scheduler = SchedulerFactory.create("custom", optimizer, **kwargs)
    """

-    @classmethod
-    def create(
-        cls, optimizer, schedule_type: str = "none", **kwargs
-    ) -> "BaseScheduler":
-        """Create a scheduler instance by type name.
-
-        Args:
-            optimizer: PyTorch optimizer
-            schedule_type: Type of scheduler ("cosine", "sgdr")
-            **kwargs: Arguments passed to the scheduler constructor
-
-        Returns:
-            Scheduler instance
-        """
-        return super().create(schedule_type, optimizer, **kwargs)
-
-    @classmethod
-    def available_types(cls) -> list:
-        """Return list of registered scheduler type names."""
-        return cls.list_registered()
-

 # ----------- Scheduler implementations -----------

--- a/astrai/trainer/strategy.py
+++ b/astrai/trainer/strategy.py
@ -127,26 +127,6 @@ class StrategyFactory(BaseFactory["BaseStrategy"]):
        strategy = StrategyFactory.create("custom", model, device)
    """

-    @classmethod
-    def create(cls, train_type: str, model, device: str, **kwargs) -> "BaseStrategy":
-        """Create a strategy instance based on training type.
-
-        Args:
-            train_type: Type of training ("seq", "sft", "dpo", "grpo")
-            model: Model instance for the strategy
-            device: Device to run the strategy on
-            **kwargs: Additional arguments passed to strategy constructor
-
-        Returns:
-            Strategy instance
-        """
-        return super().create(train_type, model, device, **kwargs)
-
-    @classmethod
-    def available_strategies(cls) -> list:
-        """Return list of registered strategy names."""
-        return cls.list_registered()
-

 # ============== Strategy Classes ==============
 # All strategies are registered at class definition time using the decorator
--- a/astrai/trainer/train_context.py
+++ b/astrai/trainer/train_context.py
@ -172,8 +172,8 @@ class TrainContextBuilder:
                        obj.load_state_dict(extra[name])

        context.strategy = StrategyFactory.create(
+            cfg.strategy,
            model=context.model,
-            train_type=cfg.strategy,
            device=device,
            executor=executor,
            model_fn=cfg.model_fn,
--- a/scripts/tools/preprocess.py
+++ b/scripts/tools/preprocess.py
@ -24,7 +24,7 @@ def main():
    )
    args = parser.parse_args()

-    config = PipelineConfig.from_json(args.config)
+    config = PipelineConfig.from_file(args.config)

    Pipeline(
        config=config,
--- a/scripts/tools/train.py
+++ b/scripts/tools/train.py
@ -231,7 +231,8 @@ def create_optimizer(model, **kwargs) -> optim.Optimizer:
 def create_scheduler(
    optimizer: optim.Optimizer, **kwargs
 ) -> optim.lr_scheduler.LRScheduler:
-    return SchedulerFactory.create(optimizer, **kwargs)
+    schedule_type = kwargs.pop("schedule_type")
+    return SchedulerFactory.create(schedule_type, optimizer, **kwargs)


 def compute_total_steps(
--- a/tests/data/test_preprocess_config.py
+++ b/tests/data/test_preprocess_config.py
@ -53,15 +53,15 @@ def test_to_dict_roundtrip():
    assert config2.mask == {"prompt": "mask", "response": "train"}


-def test_to_json_from_json(temp_dir):
+def test_to_file_from_file(temp_dir):
    config = PipelineConfig(
        input=InputConfig(sections=_TEXT_SECTIONS),
        mask={"text": "train"},
        mask_default="mask",
    )
    path = os.path.join(temp_dir, "config.json")
-    config.to_json(path)
-    loaded = PipelineConfig.from_json(path)
+    config.to_file(path)
+    loaded = PipelineConfig.from_file(path)
    assert loaded.input.sections == _TEXT_SECTIONS
    assert loaded.mask == {"text": "train"}

@ -69,8 +69,8 @@ def test_to_json_from_json(temp_dir):
 def test_dpo_config_roundtrip(temp_dir):
    config = make_dpo_chat_config()
    path = os.path.join(temp_dir, "config.json")
-    config.to_json(path)
-    loaded = PipelineConfig.from_json(path)
+    config.to_file(path)
+    loaded = PipelineConfig.from_file(path)
    assert loaded.input.sources is not None
    assert "chosen" in loaded.input.sources
    assert "rejected" in loaded.input.sources
--- a/tests/trainer/conftest.py
+++ b/tests/trainer/conftest.py
@ -65,7 +65,7 @@ def create_train_config(

    def scheduler_fn(optim):
        return SchedulerFactory.create(
-            optim, "cosine", warmup_steps=10, lr_decay_steps=10, min_rate=0.05
+            "cosine", optim, warmup_steps=10, lr_decay_steps=10, min_rate=0.05
        )

    return TrainConfig(
--- a/tests/trainer/test_callbacks.py
+++ b/tests/trainer/test_callbacks.py
@ -102,7 +102,7 @@ def test_gradient_checkpointing_trainer_integration(base_test_env, random_datase

    def scheduler_fn(optim):
        return SchedulerFactory.create(
-            optim, "cosine", warmup_steps=10, lr_decay_steps=10, min_rate=0.05
+            "cosine", optim, warmup_steps=10, lr_decay_steps=10, min_rate=0.05
        )

    train_config = TrainConfig(
@ -136,7 +136,7 @@ def test_callback_integration(base_test_env, random_dataset):

    def scheduler_fn(optim):
        return SchedulerFactory.create(
-            optim, "cosine", warmup_steps=10, lr_decay_steps=10, min_rate=0.05
+            "cosine", optim, warmup_steps=10, lr_decay_steps=10, min_rate=0.05
        )

    train_config = TrainConfig(
--- a/tests/trainer/test_early_stopping.py
+++ b/tests/trainer/test_early_stopping.py
@ -16,7 +16,7 @@ def test_early_stopping_simulation(base_test_env, early_stopping_dataset):

    def scheduler_fn(optim):
        return SchedulerFactory.create(
-            optim, "cosine", warmup_steps=10, lr_decay_steps=10, min_rate=0.05
+            "cosine", optim, warmup_steps=10, lr_decay_steps=10, min_rate=0.05
        )

    train_config = TrainConfig(
--- a/tests/trainer/test_train_strategy.py
+++ b/tests/trainer/test_train_strategy.py
@ -36,8 +36,8 @@ def test_schedule_factory_random_configs():
                min_rate = params["min_rate"]
                lr_decay_steps = total_steps - warmup_steps
                scheduler = SchedulerFactory.create(
-                    optimizer,
                    schedule_type,
+                    optimizer,
                    warmup_steps=warmup_steps,
                    lr_decay_steps=lr_decay_steps,
                    min_rate=min_rate,
@ -52,8 +52,8 @@ def test_schedule_factory_random_configs():
                t_mult = params["t_mult"]
                min_rate = params["min_rate"]
                scheduler = SchedulerFactory.create(
-                    optimizer,
                    schedule_type,
+                    optimizer,
                    warmup_steps=warmup_steps,
                    cycle_length=cycle_length,
                    t_mult=t_mult,
@ -103,8 +103,8 @@ def test_schedule_factory_edge_cases():
        min_rate = params["min_rate"]
        lr_decay_steps = total_steps - warmup_steps
        scheduler = SchedulerFactory.create(
-            optimizer,
            "cosine",
+            optimizer,
            warmup_steps=warmup_steps,
            lr_decay_steps=lr_decay_steps,
            min_rate=min_rate,
@ -129,8 +129,8 @@ def test_schedule_factory_state_persistence():
    min_rate = 0.1
    lr_decay_steps = total_steps - warmup_steps
    scheduler = SchedulerFactory.create(
-        optimizer,
        "cosine",
+        optimizer,
        warmup_steps=warmup_steps,
        lr_decay_steps=lr_decay_steps,
        min_rate=min_rate,
@ -146,8 +146,8 @@ def test_schedule_factory_state_persistence():

    # Create new scheduler with same parameters
    new_scheduler = SchedulerFactory.create(
-        optimizer,
        "cosine",
+        optimizer,
        warmup_steps=warmup_steps,
        lr_decay_steps=lr_decay_steps,
        min_rate=min_rate,