| | from pathlib import Path |
| | import torch |
| |
|
| | |
| | DATA_PATH = Path(r"data\IWSLT-15-en-vi") |
| |
|
| | |
| | |
| | TOKENIZER_NAME = "iwslt_en-vi_tokenizer_32k.json" |
| | TOKENIZER_PATH = Path(r"artifacts\tokenizers") / TOKENIZER_NAME |
| |
|
| | MODEL_DIR = Path(r"artifacts\models") |
| |
|
| | |
| | |
| | MODEL_NAME = "transformer_en_vi_iwslt_1.safetensors" |
| |
|
| | |
| | MODEL_SAVE_PATH = MODEL_DIR / "transformer_en_vi_iwslt_kaggle_1.safetensors" |
| | |
| |
|
| | CHECKPOINT_PATH = Path(r"artifacts\checkpoints") / MODEL_NAME |
| |
|
| | CACHE_DIR = "" |
| |
|
| |
|
| | |
| | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| |
|
| | NUM_WORKERS: int = 4 |
| |
|
| | VOCAB_SIZE: int = 32_000 |
| |
|
| | SPECIAL_TOKENS: list[str] = ["[PAD]", "[UNK]", "[SOS]", "[EOS]"] |
| |
|
| | NUM_SAMPLES_TO_USE: int = 1000 |
| | |
| |
|
| |
|
| | |
| | PAD_TOKEN_ID: int = 0 |
| | UNK_TOKEN_ID: int = 1 |
| | SOS_TOKEN_ID: int = 2 |
| | EOS_TOKEN_ID: int = 3 |
| |
|
| |
|
| | |
| | |
| | D_MODEL: int = 512 |
| | N_LAYERS: int = 6 |
| | N_HEADS: int = 8 |
| | |
| | D_FF: int = 2048 |
| | DROPOUT: float = 0.1 |
| | MAX_SEQ_LEN: int = 150 |
| |
|
| |
|
| | |
| | |
| | LEARNING_RATE: float = 5e-4 |
| | BATCH_SIZE: int = 32 |
| | EPOCHS: int = 5 |
| | |
| |
|
| | |
| | REPO_ID: str = "AlainDeLong/transformer-en-vi-base" |
| | FILENAME: str = "transformer_en_vi_iwslt_kaggle_1.safetensors" |
| |
|
| | if __name__ == "__main__": |
| | print(f"Using device: {DEVICE}") |
| |
|