diff --git a/README.md b/README.md index 0589828..5aa72c7 100644 --- a/README.md +++ b/README.md @@ -90,8 +90,8 @@ nohup python scripts/tools/train.py \ --warmup_ratio=0.05 \ --max_lr=1e-4 \ --max_grad_norm=1.0 \ - --adamw_beta1=0.95 \ - --adamw_beta2=0.99 \ + --adamw_beta1=0.9 \ + --adamw_beta2=0.95 \ --adamw_weight_decay=0.01 \ --window_size=2048 \ --ckpt_interval=10000 \ diff --git a/assets/docs/README-zh-CN.md b/assets/docs/README-zh-CN.md index 41d1743..e30e4c4 100644 --- a/assets/docs/README-zh-CN.md +++ b/assets/docs/README-zh-CN.md @@ -96,8 +96,8 @@ nohup python scripts/tools/train.py \ --warmup_ratio=0.05 \ --max_lr=1e-4 \ --max_grad_norm=1.0 \ - --adamw_beta1=0.95 \ - --adamw_beta2=0.99 \ + --adamw_beta1=0.9 \ + --adamw_beta2=0.95 \ --adamw_weight_decay=0.01 \ --window_size=2048 \ --ckpt_interval=10000 \ diff --git a/assets/docs/params.md b/assets/docs/params.md index ccef336..ae86e39 100644 --- a/assets/docs/params.md +++ b/assets/docs/params.md @@ -25,8 +25,8 @@ | Parameter | Description | Default | |-----------|-------------|---------| -| `--adamw_beta1` | AdamW beta1 | 0.95 | -| `--adamw_beta2` | AdamW beta2 | 0.99 | +| `--adamw_beta1` | AdamW beta1 | 0.9 | +| `--adamw_beta2` | AdamW beta2 | 0.95 | | `--adamw_weight_decay` | AdamW weight decay | 0.01 | ### Data Loading @@ -81,8 +81,8 @@ nohup python scripts/tools/train.py \ --warmup_ratio=0.05 \ --max_lr=1e-4 \ --max_grad_norm=1.0 \ - --adamw_beta1=0.95 \ - --adamw_beta2=0.99 \ + --adamw_beta1=0.9 \ + --adamw_beta2=0.95 \ --adamw_weight_decay=0.01 \ --window_size=2048 \ --ckpt_interval=10000 \ diff --git a/assets/docs/training.md b/assets/docs/training.md index a97f485..0fde6e5 100644 --- a/assets/docs/training.md +++ b/assets/docs/training.md @@ -196,8 +196,8 @@ nohup python scripts/tools/train.py \ --warmup_ratio=0.05 \ --max_lr=1e-4 \ --max_grad_norm=1.0 \ - --adamw_beta1=0.95 \ - --adamw_beta2=0.99 \ + --adamw_beta1=0.9 \ + --adamw_beta2=0.95 \ --adamw_weight_decay=0.01 \ --window_size=2048 \ --ckpt_interval=10000 \ diff --git a/scripts/tools/train.py b/scripts/tools/train.py index 1d7c72b..e9cd8df 100644 --- a/scripts/tools/train.py +++ b/scripts/tools/train.py @@ -69,14 +69,14 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--adamw_beta1", type=float, - default=0.95, - help="Beta values for AdamW optimizer.", + default=0.9, + help="Beta1 for AdamW optimizer.", ) parser.add_argument( "--adamw_beta2", type=float, - default=0.99, - help="Beta values for AdamW optimizer.", + default=0.95, + help="Beta2 for AdamW optimizer.", ) parser.add_argument( "--adamw_weight_decay",