feat: Sync training infrastructure from main repository
Browse files- requirements.txt +21 -38
- training/model.py +16 -4
requirements.txt
CHANGED
@@ -1,40 +1,23 @@
|
|
1 |
-
#
|
2 |
-
#
|
3 |
-
|
4 |
-
# Hugging Face Hub for authentication and model upload
|
5 |
-
huggingface_hub>=0.19.0
|
6 |
-
|
7 |
-
# Gradio for web interface (latest stable version with security fixes)
|
8 |
-
gradio>=5.31.0
|
9 |
-
|
10 |
-
# PyTorch for model training
|
11 |
torch>=2.0.0
|
12 |
-
torchvision>=0.15.0
|
13 |
-
|
14 |
-
# Transformers for model handling
|
15 |
-
transformers>=4.35.0
|
16 |
-
|
17 |
-
# SentencePiece for tokenization
|
18 |
-
sentencepiece>=0.1.99
|
19 |
-
|
20 |
-
# NumPy and other utilities
|
21 |
-
numpy>=1.24.0
|
22 |
-
pandas>=2.0.0
|
23 |
-
|
24 |
-
# Additional utilities
|
25 |
-
requests>=2.31.0
|
26 |
-
tqdm>=4.65.0
|
27 |
-
|
28 |
-
# Testing dependencies
|
29 |
-
pytest>=7.0.0
|
30 |
-
pytest-cov>=4.0.0
|
31 |
-
|
32 |
-
# Development dependencies
|
33 |
-
black>=23.0.0
|
34 |
-
isort>=5.12.0
|
35 |
-
bandit>=1.7.7
|
36 |
-
safety>=2.3.0
|
37 |
|
38 |
-
#
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Core Machine Learning Dependencies
|
2 |
+
# PyTorch - Deep learning framework for model training and inference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
torch>=2.0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
+
# Hugging Face Ecosystem - Model loading, training, and tokenization
|
6 |
+
transformers>=4.30.0 # Pre-trained models and training utilities
|
7 |
+
datasets>=2.12.0 # Dataset loading and processing
|
8 |
+
tokenizers>=0.13.0 # Fast tokenization library
|
9 |
+
sentencepiece>=0.1.99 # SentencePiece tokenization
|
10 |
+
huggingface_hub>=0.34.0 # Hugging Face Hub integration
|
11 |
+
accelerate>=0.20.0 # Distributed training acceleration
|
12 |
+
|
13 |
+
# User Interface - Gradio for web-based training interface
|
14 |
+
gradio>=4.0.0 # Web UI framework for ML applications
|
15 |
+
|
16 |
+
# Data Processing and Utilities
|
17 |
+
numpy>=1.24.0 # Numerical computing library
|
18 |
+
pandas>=2.0.0 # Data manipulation and analysis
|
19 |
+
tqdm>=4.65.0 # Progress bars for long-running operations
|
20 |
+
psutil>=5.9.0 # System and process utilities
|
21 |
+
|
22 |
+
# Note: These versions are compatible with Hugging Face Spaces
|
23 |
+
# and provide stable training performance
|
training/model.py
CHANGED
@@ -414,12 +414,13 @@ class GPTModel(nn.Module):
|
|
414 |
- Text generation (inference)
|
415 |
"""
|
416 |
|
417 |
-
def __init__(self, config: GPTConfig):
|
418 |
super().__init__()
|
419 |
assert config.vocab_size is not None, "vocab_size must be specified"
|
420 |
assert config.block_size is not None, "block_size must be specified"
|
421 |
|
422 |
self.config = config
|
|
|
423 |
|
424 |
# Embeddings
|
425 |
self.transformer = nn.ModuleDict(
|
@@ -504,9 +505,20 @@ class GPTModel(nn.Module):
|
|
504 |
# Combine embeddings and apply dropout
|
505 |
x = self.transformer.drop(tok_emb + pos_emb)
|
506 |
|
507 |
-
# Pass through transformer blocks
|
508 |
-
|
509 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
510 |
|
511 |
# Final layer normalization
|
512 |
x = self.transformer.ln_f(x)
|
|
|
414 |
- Text generation (inference)
|
415 |
"""
|
416 |
|
417 |
+
def __init__(self, config: GPTConfig, use_checkpoint=True):
|
418 |
super().__init__()
|
419 |
assert config.vocab_size is not None, "vocab_size must be specified"
|
420 |
assert config.block_size is not None, "block_size must be specified"
|
421 |
|
422 |
self.config = config
|
423 |
+
self.use_checkpoint = use_checkpoint
|
424 |
|
425 |
# Embeddings
|
426 |
self.transformer = nn.ModuleDict(
|
|
|
505 |
# Combine embeddings and apply dropout
|
506 |
x = self.transformer.drop(tok_emb + pos_emb)
|
507 |
|
508 |
+
# Pass through transformer blocks with optional gradient checkpointing
|
509 |
+
if self.use_checkpoint and self.training:
|
510 |
+
# Use gradient checkpointing to save memory during training
|
511 |
+
try:
|
512 |
+
for block in self.transformer.h:
|
513 |
+
x = torch.utils.checkpoint.checkpoint(block, x)
|
514 |
+
except AttributeError:
|
515 |
+
# Fallback for older PyTorch versions
|
516 |
+
for block in self.transformer.h:
|
517 |
+
x = block(x)
|
518 |
+
else:
|
519 |
+
# Standard forward pass
|
520 |
+
for block in self.transformer.h:
|
521 |
+
x = block(x)
|
522 |
|
523 |
# Final layer normalization
|
524 |
x = self.transformer.ln_f(x)
|