Spaces:

lemms
/

openllm

Running

lemms commited on 8 days ago

Commit

db8b6e2

verified ·

1 Parent(s): 49a60b2

feat: Sync training infrastructure from main repository

Files changed (2) hide show

requirements.txt CHANGED Viewed

@@ -1,40 +1,23 @@
-# OpenLLM Training Space Requirements
-# Core dependencies for Space deployment
-# Hugging Face Hub for authentication and model upload
-huggingface_hub>=0.19.0
-# Gradio for web interface (latest stable version with security fixes)
-gradio>=5.31.0
-# PyTorch for model training
 torch>=2.0.0
-torchvision>=0.15.0
-# Transformers for model handling
-transformers>=4.35.0
-# SentencePiece for tokenization
-sentencepiece>=0.1.99
-# NumPy and other utilities
-numpy>=1.24.0
-pandas>=2.0.0
-# Additional utilities
-requests>=2.31.0
-tqdm>=4.65.0
-# Testing dependencies
-pytest>=7.0.0
-pytest-cov>=4.0.0
-# Development dependencies
-black>=23.0.0
-isort>=5.12.0
-bandit>=1.7.7
-safety>=2.3.0
-# FastAPI for inference server
-fastapi>=0.100.0
-uvicorn>=0.23.0

+# Core Machine Learning Dependencies
+# PyTorch - Deep learning framework for model training and inference
 torch>=2.0.0
+# Hugging Face Ecosystem - Model loading, training, and tokenization
+transformers>=4.30.0      # Pre-trained models and training utilities
+datasets>=2.12.0          # Dataset loading and processing
+tokenizers>=0.13.0        # Fast tokenization library
+sentencepiece>=0.1.99     # SentencePiece tokenization
+huggingface_hub>=0.34.0   # Hugging Face Hub integration
+accelerate>=0.20.0        # Distributed training acceleration
+# User Interface - Gradio for web-based training interface
+gradio>=4.0.0             # Web UI framework for ML applications
+# Data Processing and Utilities
+numpy>=1.24.0             # Numerical computing library
+pandas>=2.0.0             # Data manipulation and analysis
+tqdm>=4.65.0              # Progress bars for long-running operations
+psutil>=5.9.0             # System and process utilities
+# Note: These versions are compatible with Hugging Face Spaces
+# and provide stable training performance

training/model.py CHANGED Viewed

@@ -414,12 +414,13 @@ class GPTModel(nn.Module):
     - Text generation (inference)
     """
-    def __init__(self, config: GPTConfig):
         super().__init__()
         assert config.vocab_size is not None, "vocab_size must be specified"
         assert config.block_size is not None, "block_size must be specified"
         self.config = config
         # Embeddings
         self.transformer = nn.ModuleDict(
@@ -504,9 +505,20 @@ class GPTModel(nn.Module):
         # Combine embeddings and apply dropout
         x = self.transformer.drop(tok_emb + pos_emb)
-        # Pass through transformer blocks
-        for block in self.transformer.h:
-            x = block(x)
         # Final layer normalization
         x = self.transformer.ln_f(x)

     - Text generation (inference)
     """
+    def __init__(self, config: GPTConfig, use_checkpoint=True):
         super().__init__()
         assert config.vocab_size is not None, "vocab_size must be specified"
         assert config.block_size is not None, "block_size must be specified"
         self.config = config
+        self.use_checkpoint = use_checkpoint
         # Embeddings
         self.transformer = nn.ModuleDict(
         # Combine embeddings and apply dropout
         x = self.transformer.drop(tok_emb + pos_emb)
+        # Pass through transformer blocks with optional gradient checkpointing
+        if self.use_checkpoint and self.training:
+            # Use gradient checkpointing to save memory during training
+            try:
+                for block in self.transformer.h:
+                    x = torch.utils.checkpoint.checkpoint(block, x)
+            except AttributeError:
+                # Fallback for older PyTorch versions
+                for block in self.transformer.h:
+                    x = block(x)
+        else:
+            # Standard forward pass
+            for block in self.transformer.h:
+                x = block(x)
         # Final layer normalization
         x = self.transformer.ln_f(x)