diff --git a/astrai/model/encoder.py b/astrai/model/encoder.py index 33a5518..2957be7 100644 --- a/astrai/model/encoder.py +++ b/astrai/model/encoder.py @@ -68,9 +68,6 @@ class EmbeddingEncoder(AutoModel): x = self.embed_tokens(input_ids) - if position_ids is None: - position_ids = torch.arange(S, device=x.device).unsqueeze(0).expand(B, -1) - rotary_emb = self.rotary_embedding(x, position_ids) attn_mask = process_attention_mask(x, position_ids, input_mask, is_causal=False) diff --git a/astrai/model/transformer.py b/astrai/model/transformer.py index b6bdcda..bf68f21 100644 --- a/astrai/model/transformer.py +++ b/astrai/model/transformer.py @@ -1,4 +1,4 @@ -from typing import Any, Mapping, Optional +from typing import Any, Dict, Mapping, Optional import torch import torch.nn as nn @@ -136,7 +136,7 @@ class AutoRegressiveLM(AutoModel): input_mask: Optional[Tensor] = None, paged_cache: Optional[KvcacheView] = None, position_ids: Optional[Tensor] = None, - ) -> Tensor: + ) -> Dict[str, Tensor]: assert input_ids.ndim == 2 x = self.embed_tokens(input_ids) diff --git a/scripts/tools/evaluate_mmlu.py b/scripts/tools/evaluate_mmlu.py index 8bbef5b..fb9321b 100644 --- a/scripts/tools/evaluate_mmlu.py +++ b/scripts/tools/evaluate_mmlu.py @@ -197,7 +197,7 @@ def evaluate_subject( def main(): parser = argparse.ArgumentParser(description="MMLU evaluation") parser.add_argument( - "--model_dir", type=str, default="./params", help="Model directory" + "--param_path", type=str, default="./params", help="Model directory" ) parser.add_argument( "--data_dir", type=str, default="./mmlu_data", help="MMLU data directory" @@ -228,8 +228,8 @@ def main(): if args.download or not os.path.exists(args.data_dir): download_mmlu(args.data_dir) - model = AutoModel.from_pretrained(args.model_dir) - tokenizer = AutoTokenizer.from_pretrained(args.model_dir) + model = AutoModel.from_pretrained(args.param_path) + tokenizer = AutoTokenizer.from_pretrained(args.param_path) device = args.device dtype = getattr(torch, args.dtype) model.to(device=device, dtype=dtype) diff --git a/scripts/tools/perplexity.py b/scripts/tools/perplexity.py index 84b2640..970440f 100644 --- a/scripts/tools/perplexity.py +++ b/scripts/tools/perplexity.py @@ -10,11 +10,11 @@ from astrai.tokenize import AutoTokenizer def process_file( - model_dir: str, input_file: str, output_file: str, batch_size: int, text_key: str + param_path: str, input_file: str, output_file: str, batch_size: int, text_key: str ): # Load model and tokenizer - model = AutoModel.from_pretrained(model_dir) - tokenizer = AutoTokenizer.from_pretrained(model_dir) + model = AutoModel.from_pretrained(param_path) + tokenizer = AutoTokenizer.from_pretrained(param_path) model.to(device="cuda", dtype=torch.bfloat16) with open(input_file, "r", encoding="utf-8") as f: @@ -88,7 +88,7 @@ def process_file( if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run perplexity with a Khaosz model.") parser.add_argument( - "--model_dir", type=str, required=True, help="Path to the model directory." + "--param_path", type=str, required=True, help="Path to the model directory." ) parser.add_argument( "--input_file", type=str, required=True, help="Path to the input file." diff --git a/scripts/tools/server.py b/scripts/tools/server.py index 8f06e4d..68f0cde 100644 --- a/scripts/tools/server.py +++ b/scripts/tools/server.py @@ -18,7 +18,7 @@ def main(): "--reload", action="store_true", help="Enable auto-reload for development" ) parser.add_argument( - "--param-path", + "--param_path", type=Path, default=None, help="Path to model parameters (default: project_root/params)",