cren / weclone /train /train_sft.py
CrenCren's picture
Upload folder using huggingface_hub
88aba71 verified
import os
import sys
import json
from llamafactory.train.tuner import run_exp
from llamafactory.extras.misc import get_current_device
from weclone.utils.config import load_config
from weclone.utils.log import logger
from weclone.data.clean.strategies import LLMCleaningStrategy
def main():
train_config = load_config(arg_type="train_sft")
dataset_config = load_config(arg_type="make_dataset")
device = get_current_device()
if device == "cpu":
logger.warning("请注意你正在使用CPU训练,非Mac设备可能会出现问题")
cleaner = LLMCleaningStrategy(make_dataset_config=dataset_config)
cleaned_data_path = cleaner.clean()
if not os.path.exists(cleaned_data_path):
logger.error(f"错误:文件 '{cleaned_data_path}' 不存在,请确保数据处理步骤已正确生成该文件。")
sys.exit(1)
formatted_config = json.dumps(train_config, indent=4, ensure_ascii=False)
logger.info(f"微调配置:\n{formatted_config}")
run_exp(train_config)
if __name__ == "__main__":
main()