winglian commited on
Commit
851ccb1
1 Parent(s): 18cabc0

bump deepspeed for fix for grad norm compute putting tensors on different devices (#1699)

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. setup.py +1 -1
requirements.txt CHANGED
@@ -5,7 +5,7 @@ transformers==4.41.1
5
  tokenizers==0.19.1
6
  bitsandbytes==0.43.1
7
  accelerate==0.30.1
8
- deepspeed==0.14.2
9
  pydantic==2.6.3
10
  addict
11
  fire
 
5
  tokenizers==0.19.1
6
  bitsandbytes==0.43.1
7
  accelerate==0.30.1
8
+ deepspeed @ git+https://github.com/microsoft/DeepSpeed.git@bc48371c5e1fb8fd70fc79285e66201dbb65679b
9
  pydantic==2.6.3
10
  addict
11
  fire
setup.py CHANGED
@@ -83,7 +83,7 @@ setup(
83
  "fused-dense-lib @ git+https://github.com/Dao-AILab/flash-attention@v2.5.8#subdirectory=csrc/fused_dense_lib",
84
  ],
85
  "deepspeed": [
86
- "deepspeed==0.14.2",
87
  "deepspeed-kernels",
88
  ],
89
  "mamba-ssm": [
 
83
  "fused-dense-lib @ git+https://github.com/Dao-AILab/flash-attention@v2.5.8#subdirectory=csrc/fused_dense_lib",
84
  ],
85
  "deepspeed": [
86
+ "deepspeed @ git+https://github.com/microsoft/DeepSpeed.git@bc48371c5e1fb8fd70fc79285e66201dbb65679b",
87
  "deepspeed-kernels",
88
  ],
89
  "mamba-ssm": [