make sure to register the base chatml template even if no system message is provided (#1207)
Browse files- .github/workflows/tests.yml +4 -0
- src/axolotl/cli/preprocess.py +2 -0
- src/axolotl/cli/train.py +3 -0
- src/axolotl/utils/data.py +2 -1
.github/workflows/tests.yml
CHANGED
|
@@ -106,3 +106,7 @@ jobs:
|
|
| 106 |
- name: GPU Unit Tests monkeypatched w docker image
|
| 107 |
run: |
|
| 108 |
docker run --privileged --gpus "all" --env WANDB_DISABLED=true --rm ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }} pytest /workspace/axolotl/tests/e2e/patched/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
- name: GPU Unit Tests monkeypatched w docker image
|
| 107 |
run: |
|
| 108 |
docker run --privileged --gpus "all" --env WANDB_DISABLED=true --rm ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }} pytest /workspace/axolotl/tests/e2e/patched/
|
| 109 |
+
- name: Prune image from docker
|
| 110 |
+
if: github.ref != 'refs/heads/main'
|
| 111 |
+
run: |
|
| 112 |
+
docker rmi -f ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}
|
src/axolotl/cli/preprocess.py
CHANGED
|
@@ -40,6 +40,8 @@ def do_cli(config: Path = Path("examples/"), **kwargs):
|
|
| 40 |
f"ChatML set. Adding default system message: {parsed_cfg.default_system_message}"
|
| 41 |
)
|
| 42 |
register_chatml_template(parsed_cfg.default_system_message)
|
|
|
|
|
|
|
| 43 |
|
| 44 |
if not parsed_cfg.dataset_prepared_path:
|
| 45 |
msg = (
|
|
|
|
| 40 |
f"ChatML set. Adding default system message: {parsed_cfg.default_system_message}"
|
| 41 |
)
|
| 42 |
register_chatml_template(parsed_cfg.default_system_message)
|
| 43 |
+
else:
|
| 44 |
+
register_chatml_template()
|
| 45 |
|
| 46 |
if not parsed_cfg.dataset_prepared_path:
|
| 47 |
msg = (
|
src/axolotl/cli/train.py
CHANGED
|
@@ -43,7 +43,10 @@ def do_train(cfg, cli_args) -> Tuple[PreTrainedModel, PreTrainedTokenizer]:
|
|
| 43 |
f"ChatML set. Adding default system message: {cfg.default_system_message}"
|
| 44 |
)
|
| 45 |
register_chatml_template(cfg.default_system_message)
|
|
|
|
|
|
|
| 46 |
|
|
|
|
| 47 |
dataset_meta = load_rl_datasets(cfg=cfg, cli_args=cli_args)
|
| 48 |
else:
|
| 49 |
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
|
|
|
| 43 |
f"ChatML set. Adding default system message: {cfg.default_system_message}"
|
| 44 |
)
|
| 45 |
register_chatml_template(cfg.default_system_message)
|
| 46 |
+
else:
|
| 47 |
+
register_chatml_template()
|
| 48 |
|
| 49 |
+
if cfg.rl:
|
| 50 |
dataset_meta = load_rl_datasets(cfg=cfg, cli_args=cli_args)
|
| 51 |
else:
|
| 52 |
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
src/axolotl/utils/data.py
CHANGED
|
@@ -16,6 +16,7 @@ from datasets import (
|
|
| 16 |
load_from_disk,
|
| 17 |
)
|
| 18 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 19 |
from torch.utils.data import RandomSampler
|
| 20 |
from transformers import PreTrainedTokenizerBase
|
| 21 |
|
|
@@ -213,7 +214,7 @@ def load_tokenized_prepared_datasets(
|
|
| 213 |
token=use_auth_token,
|
| 214 |
)
|
| 215 |
ds_from_hub = True
|
| 216 |
-
except (FileNotFoundError, ConnectionError):
|
| 217 |
pass
|
| 218 |
|
| 219 |
ds_from_cloud = False
|
|
|
|
| 16 |
load_from_disk,
|
| 17 |
)
|
| 18 |
from huggingface_hub import hf_hub_download
|
| 19 |
+
from huggingface_hub.utils import HFValidationError
|
| 20 |
from torch.utils.data import RandomSampler
|
| 21 |
from transformers import PreTrainedTokenizerBase
|
| 22 |
|
|
|
|
| 214 |
token=use_auth_token,
|
| 215 |
)
|
| 216 |
ds_from_hub = True
|
| 217 |
+
except (FileNotFoundError, ConnectionError, HFValidationError):
|
| 218 |
pass
|
| 219 |
|
| 220 |
ds_from_cloud = False
|