from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import Any, Dict, List, Optional @dataclass class EvalResult: task_name: str num_samples: int accuracy: float results: List[Dict[str, Any]] = field(default_factory=list) metadata: Dict[str, Any] = field(default_factory=dict) class BaseEvaluator(ABC): def __init__(self, api_base: str, api_key: str = "not-needed", **kwargs): self.api_base = api_base.rstrip("/") self.api_key = api_key @abstractmethod def evaluate(self, data_path: str) -> EvalResult: ... @abstractmethod def load_data(self, data_path: str) -> List[Dict[str, Any]]: ...