lemms commited on
Commit
db8b6e2
·
verified ·
1 Parent(s): 49a60b2

feat: Sync training infrastructure from main repository

Browse files
Files changed (2) hide show
  1. requirements.txt +21 -38
  2. training/model.py +16 -4
requirements.txt CHANGED
@@ -1,40 +1,23 @@
1
- # OpenLLM Training Space Requirements
2
- # Core dependencies for Space deployment
3
-
4
- # Hugging Face Hub for authentication and model upload
5
- huggingface_hub>=0.19.0
6
-
7
- # Gradio for web interface (latest stable version with security fixes)
8
- gradio>=5.31.0
9
-
10
- # PyTorch for model training
11
  torch>=2.0.0
12
- torchvision>=0.15.0
13
-
14
- # Transformers for model handling
15
- transformers>=4.35.0
16
-
17
- # SentencePiece for tokenization
18
- sentencepiece>=0.1.99
19
-
20
- # NumPy and other utilities
21
- numpy>=1.24.0
22
- pandas>=2.0.0
23
-
24
- # Additional utilities
25
- requests>=2.31.0
26
- tqdm>=4.65.0
27
-
28
- # Testing dependencies
29
- pytest>=7.0.0
30
- pytest-cov>=4.0.0
31
-
32
- # Development dependencies
33
- black>=23.0.0
34
- isort>=5.12.0
35
- bandit>=1.7.7
36
- safety>=2.3.0
37
 
38
- # FastAPI for inference server
39
- fastapi>=0.100.0
40
- uvicorn>=0.23.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core Machine Learning Dependencies
2
+ # PyTorch - Deep learning framework for model training and inference
 
 
 
 
 
 
 
 
3
  torch>=2.0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ # Hugging Face Ecosystem - Model loading, training, and tokenization
6
+ transformers>=4.30.0 # Pre-trained models and training utilities
7
+ datasets>=2.12.0 # Dataset loading and processing
8
+ tokenizers>=0.13.0 # Fast tokenization library
9
+ sentencepiece>=0.1.99 # SentencePiece tokenization
10
+ huggingface_hub>=0.34.0 # Hugging Face Hub integration
11
+ accelerate>=0.20.0 # Distributed training acceleration
12
+
13
+ # User Interface - Gradio for web-based training interface
14
+ gradio>=4.0.0 # Web UI framework for ML applications
15
+
16
+ # Data Processing and Utilities
17
+ numpy>=1.24.0 # Numerical computing library
18
+ pandas>=2.0.0 # Data manipulation and analysis
19
+ tqdm>=4.65.0 # Progress bars for long-running operations
20
+ psutil>=5.9.0 # System and process utilities
21
+
22
+ # Note: These versions are compatible with Hugging Face Spaces
23
+ # and provide stable training performance
training/model.py CHANGED
@@ -414,12 +414,13 @@ class GPTModel(nn.Module):
414
  - Text generation (inference)
415
  """
416
 
417
- def __init__(self, config: GPTConfig):
418
  super().__init__()
419
  assert config.vocab_size is not None, "vocab_size must be specified"
420
  assert config.block_size is not None, "block_size must be specified"
421
 
422
  self.config = config
 
423
 
424
  # Embeddings
425
  self.transformer = nn.ModuleDict(
@@ -504,9 +505,20 @@ class GPTModel(nn.Module):
504
  # Combine embeddings and apply dropout
505
  x = self.transformer.drop(tok_emb + pos_emb)
506
 
507
- # Pass through transformer blocks
508
- for block in self.transformer.h:
509
- x = block(x)
 
 
 
 
 
 
 
 
 
 
 
510
 
511
  # Final layer normalization
512
  x = self.transformer.ln_f(x)
 
414
  - Text generation (inference)
415
  """
416
 
417
+ def __init__(self, config: GPTConfig, use_checkpoint=True):
418
  super().__init__()
419
  assert config.vocab_size is not None, "vocab_size must be specified"
420
  assert config.block_size is not None, "block_size must be specified"
421
 
422
  self.config = config
423
+ self.use_checkpoint = use_checkpoint
424
 
425
  # Embeddings
426
  self.transformer = nn.ModuleDict(
 
505
  # Combine embeddings and apply dropout
506
  x = self.transformer.drop(tok_emb + pos_emb)
507
 
508
+ # Pass through transformer blocks with optional gradient checkpointing
509
+ if self.use_checkpoint and self.training:
510
+ # Use gradient checkpointing to save memory during training
511
+ try:
512
+ for block in self.transformer.h:
513
+ x = torch.utils.checkpoint.checkpoint(block, x)
514
+ except AttributeError:
515
+ # Fallback for older PyTorch versions
516
+ for block in self.transformer.h:
517
+ x = block(x)
518
+ else:
519
+ # Standard forward pass
520
+ for block in self.transformer.h:
521
+ x = block(x)
522
 
523
  # Final layer normalization
524
  x = self.transformer.ln_f(x)