Spaces:

jljiu
/

nekoa

Sleeping

App Files Files Community

jljiu commited on Dec 27, 2024

Commit

a903621

verified ·

1 Parent(s): 26da30c

Update train.py

Browse files

Files changed (1) hide show

train.py +18 -109

train.py CHANGED Viewed

@@ -12,118 +12,27 @@ class ModelTrainer:
         self.model_id = model_id
         # 加载系统提示词
-        try:
-            with open(system_prompts_path, 'r', encoding='utf-8') as f:
-                self.system_prompts = json.load(f)
-        except Exception as e:
-            print(f"加载系统提示词失败: {e}")
-            self.system_prompts = {"base_prompt": "默认系统提示词"}
-        # 首先尝试检测可用资源
-        self.device = self._detect_device()
-        self.dtype = self._detect_optimal_dtype()
-        # 初始化模型和分词器
-        self._initialize_model_and_tokenizer()
-    def _detect_device(self):
-        """检测并返回最优设备配置"""
-        try:
-            if torch.cuda.is_available():
-                print("检测到 CUDA 设备")
-                return "cuda"
-            elif torch.backends.mps.is_available():
-                print("检测到 MPS 设备")
-                return "mps"
-            else:
-                print("使用 CPU 设备")
-                return "cpu"
-        except:
-            print("设备检测失败，默认使用 CPU")
-            return "cpu"
-    def _detect_optimal_dtype(self):
-        """检测并返回最优数据类型"""
-        try:
-            if self.device == "cuda":
-                if torch.cuda.get_device_capability()[0] >= 7:
-                    print("使用 float16 精度")
-                    return torch.float16
-            print("使用 float32 精度")
-            return torch.float32
-        except:
-            print("数据类型检测失败，默认使用 float32")
-            return torch.float32
-    def _initialize_model_and_tokenizer(self):
-        """初始化模型和分词器，包含多个备选方案"""
-        print(f"开始初始化模型，使用设备: {self.device}，数据类型: {self.dtype}")
-        # 首先尝试加载分词器
-        try:
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                self.model_id,
-                trust_remote_code=True
-            )
-        except Exception as e:
-            print(f"分词器加载失败: {e}")
-            raise RuntimeError("分词器初始化失败")
-        # 尝试不同的模型加载配置
-        loading_configs = [
-            # 配置1：标准加载
-            {
-                "trust_remote_code": True,
-                "torch_dtype": self.dtype,
-                "device_map": "auto",
-                "low_cpu_mem_usage": True
-            },
-            # 配置2：8bit量化加载
-            {
-                "trust_remote_code": True,
-                "load_in_8bit": True,
-                "device_map": "auto",
-            },
-            # 配置3：CPU加载
-            {
-                "trust_remote_code": True,
-                "torch_dtype": torch.float32,
-                "device_map": "cpu",
-                "low_cpu_mem_usage": True
-            }
-        ]
-        last_exception = None
-        for config in loading_configs:
-            try:
-                print(f"尝试加载模型，配置: {config}")
-                self.model = AutoModelForCausalLM.from_pretrained(
-                    self.model_id,
-                    **config
-                )
-                print("模型加载成功")
-                # 配置 LoRA
-                try:
-                    self._setup_lora()
-                    print("LoRA 配置成功")
-                except Exception as e:
-                    print(f"LoRA 配置失败: {e}")
-                    raise
-                return  # 成功加载后退出
-            except Exception as e:
-                last_exception = e
-                print(f"当前配置加载失败: {e}")
-                continue
-        # 如果所有配置都失败
-        raise RuntimeError(f"所有模型加载配置均失败，最后的错误: {last_exception}")
-    def _setup_lora(self):
-        """配置 LoRA"""
         self.lora_config = LoraConfig(
-            r=4,
             lora_alpha=16,
             target_modules=["q_proj", "v_proj"],
             lora_dropout=0.05,

         self.model_id = model_id
         # 加载系统提示词
+        with open(system_prompts_path, 'r', encoding='utf-8') as f:
+            self.system_prompts = json.load(f)
+        # 修改模型初始化参数
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_id,
+            trust_remote_code=True
+        )
+        # 修改这部分的初始化参数
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            trust_remote_code=True,
+            torch_dtype=torch.float32,  # 使用 torch.float32 而不是字符串
+            device_map='auto',          # 自动选择设备
+            low_cpu_mem_usage=True
+        )
+        # 使用更轻量的LoRA配置
         self.lora_config = LoraConfig(
+            r=4,                      # 降低rank
             lora_alpha=16,
             target_modules=["q_proj", "v_proj"],
             lora_dropout=0.05,