25 lines
687 B
Python
25 lines
687 B
Python
from abc import ABC, abstractmethod
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
|
|
@dataclass
|
|
class EvalResult:
|
|
task_name: str
|
|
num_samples: int
|
|
accuracy: float
|
|
results: List[Dict[str, Any]] = field(default_factory=list)
|
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
|
|
class BaseEvaluator(ABC):
|
|
def __init__(self, api_base: str, api_key: str = "not-needed", **kwargs):
|
|
self.api_base = api_base.rstrip("/")
|
|
self.api_key = api_key
|
|
|
|
@abstractmethod
|
|
def evaluate(self, data_path: str) -> EvalResult: ...
|
|
|
|
@abstractmethod
|
|
def load_data(self, data_path: str) -> List[Dict[str, Any]]: ...
|