llmEval/llm_eval/base.py

25 lines
687 B
Python

from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
@dataclass
class EvalResult:
task_name: str
num_samples: int
accuracy: float
results: List[Dict[str, Any]] = field(default_factory=list)
metadata: Dict[str, Any] = field(default_factory=dict)
class BaseEvaluator(ABC):
def __init__(self, api_base: str, api_key: str = "not-needed", **kwargs):
self.api_base = api_base.rstrip("/")
self.api_key = api_key
@abstractmethod
def evaluate(self, data_path: str) -> EvalResult: ...
@abstractmethod
def load_data(self, data_path: str) -> List[Dict[str, Any]]: ...