md896 commited on
Commit
b2ce6c6
·
1 Parent(s): 711ae38

Fix vllm error cleanly by creating fake python module structure

Browse files
Files changed (1) hide show
  1. ultimate_sota_training.py +14 -1
ultimate_sota_training.py CHANGED
@@ -69,7 +69,6 @@ def bootstrap_deps() -> None:
69
  "trl>=0.18.2,<0.24.0",
70
  "mergekit",
71
  "llm-blender",
72
- "vllm",
73
  "weave",
74
  "wandb",
75
  "matplotlib",
@@ -105,6 +104,20 @@ import transformers.utils.hub
105
  if not hasattr(transformers.utils.hub, "TRANSFORMERS_CACHE"):
106
  transformers.utils.hub.TRANSFORMERS_CACHE = "/tmp"
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  from trl import GRPOConfig, GRPOTrainer
109
  from unsloth import FastLanguageModel
110
 
 
69
  "trl>=0.18.2,<0.24.0",
70
  "mergekit",
71
  "llm-blender",
 
72
  "weave",
73
  "wandb",
74
  "matplotlib",
 
104
  if not hasattr(transformers.utils.hub, "TRANSFORMERS_CACHE"):
105
  transformers.utils.hub.TRANSFORMERS_CACHE = "/tmp"
106
 
107
+ # CRITICAL FIX for vllm crash:
108
+ # Create a valid Python module on disk to satisfy both importlib and TRL's hard imports.
109
+ import os
110
+ import sys
111
+ vllm_dir = "/tmp/fake_vllm"
112
+ os.makedirs(os.path.join(vllm_dir, "vllm", "distributed", "device_communicators", "pynccl"), exist_ok=True)
113
+ open(os.path.join(vllm_dir, "vllm", "__init__.py"), "w").close()
114
+ open(os.path.join(vllm_dir, "vllm", "distributed", "__init__.py"), "w").close()
115
+ open(os.path.join(vllm_dir, "vllm", "distributed", "device_communicators", "__init__.py"), "w").close()
116
+ with open(os.path.join(vllm_dir, "vllm", "distributed", "device_communicators", "pynccl", "__init__.py"), "w") as f:
117
+ f.write("class PyNcclCommunicator: pass\n")
118
+ if vllm_dir not in sys.path:
119
+ sys.path.insert(0, vllm_dir)
120
+
121
  from trl import GRPOConfig, GRPOTrainer
122
  from unsloth import FastLanguageModel
123