File size: 9,061 Bytes
57d76cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
#!/usr/bin/env python3
"""
Integration Guide: Add Authentication to Existing Training Code

This script shows how to integrate Hugging Face authentication into your
existing OpenLLM training code. Copy the relevant parts into your training script.

Usage:
    Use this as a reference to update your existing training code.
"""

import os
import sys
import json

try:
    from huggingface_hub import HfApi, login, whoami, create_repo
    HF_AVAILABLE = True
except ImportError:
    HF_AVAILABLE = False
    print("❌ huggingface_hub not installed")
    sys.exit(1)


def setup_hf_authentication():
    """
    Set up Hugging Face authentication using GitHub secrets.
    Add this function to your training script.
    """
    print("πŸ” Setting up Hugging Face Authentication")
    print("-" * 40)
    
    try:
        # Get token from GitHub secrets
        token = os.getenv("HF_TOKEN")
        if not token:
            raise ValueError("HF_TOKEN not found. Please set it in GitHub repository secrets.")
        
        # Login
        login(token=token)
        
        # Get user info
        api = HfApi()
        user_info = whoami()
        username = user_info["name"]
        
        print(f"βœ… Authentication successful!")
        print(f"   - Username: {username}")
        print(f"   - Source: GitHub secrets")
        
        return api, username
        
    except Exception as e:
        print(f"❌ Authentication failed: {e}")
        raise


def upload_model_after_training(api, username, model_dir, model_size="small", steps=8000):
    """
    Upload the trained model to Hugging Face Hub.
    Call this function after your training completes.
    """
    try:
        # Create repository name
        repo_name = f"openllm-{model_size}-extended-{steps//1000}k"
        repo_id = f"{username}/{repo_name}"
        
        print(f"\nπŸ“€ Uploading model to {repo_id}")
        
        # Create repository
        create_repo(
            repo_id=repo_id,
            repo_type="model",
            exist_ok=True,
            private=False
        )
        
        # Create model configuration
        config = {
            "architectures": ["GPTModel"],
            "model_type": "gpt",
            "vocab_size": 32000,
            "n_positions": 2048,
            "n_embd": 768 if model_size == "small" else 1024 if model_size == "medium" else 1280,
            "n_layer": 12 if model_size == "small" else 24 if model_size == "medium" else 32,
            "n_head": 12 if model_size == "small" else 16 if model_size == "medium" else 20,
            "bos_token_id": 1,
            "eos_token_id": 2,
            "pad_token_id": 0,
            "unk_token_id": 3,
            "transformers_version": "4.35.0",
            "use_cache": True
        }
        
        config_path = os.path.join(model_dir, "config.json")
        with open(config_path, "w") as f:
            json.dump(config, f, indent=2)
        
        # Create model card
        model_card = f"""# OpenLLM {model_size.capitalize()} Model ({steps} steps)

This is a trained OpenLLM {model_size} model with extended training.

## Model Details
- **Model Type**: GPT-style decoder-only transformer
- **Architecture**: Custom OpenLLM implementation
- **Training Data**: SQUAD dataset (Wikipedia passages)
- **Vocabulary Size**: 32,000 tokens
- **Sequence Length**: 2,048 tokens
- **Model Size**: {model_size.capitalize()}
- **Training Steps**: {steps:,}

## Usage
This model can be used with the OpenLLM framework for text generation and language modeling tasks.

## License
This model is released under the GNU General Public License v3.0.

## Repository
This model is hosted on Hugging Face Hub: https://huggingface.co/{repo_id}
"""
        
        readme_path = os.path.join(model_dir, "README.md")
        with open(readme_path, "w") as f:
            f.write(model_card)
        
        # Upload all files
        api.upload_folder(
            folder_path=model_dir,
            repo_id=repo_id,
            repo_type="model",
            commit_message=f"Add OpenLLM {model_size} model ({steps} steps)"
        )
        
        print(f"βœ… Model uploaded successfully!")
        print(f"   - Repository: https://huggingface.co/{repo_id}")
        
        return repo_id
        
    except Exception as e:
        print(f"❌ Upload failed: {e}")
        raise


# ============================================================================
# INTEGRATION EXAMPLE: How to modify your existing training code
# ============================================================================

def example_integration():
    """
    Example of how to integrate authentication into your existing training code.
    """
    print("πŸš€ Example: Integrating Authentication into Training")
    print("=" * 55)
    
    # Step 1: Set up authentication at the start
    print("\n1️⃣ Setting up authentication...")
    api, username = setup_hf_authentication()
    
    # Step 2: Your existing training code goes here
    print("\n2️⃣ Running your existing training code...")
    print("   - This is where your actual training happens")
    print("   - Training saves model to: ./openllm-trained")
    
    # Simulate training completion
    model_dir = "./openllm-trained"
    os.makedirs(model_dir, exist_ok=True)
    
    # Create dummy model file
    with open(os.path.join(model_dir, "best_model.pt"), "w") as f:
        f.write("Dummy model file")
    
    print("   βœ… Training completed!")
    
    # Step 3: Upload model after training
    print("\n3️⃣ Uploading model...")
    repo_id = upload_model_after_training(
        api=api,
        username=username,
        model_dir=model_dir,
        model_size="small",
        steps=8000
    )
    
    print(f"\nπŸŽ‰ Success! Model available at: https://huggingface.co/{repo_id}")


# ============================================================================
# CODE SNIPPETS FOR YOUR EXISTING TRAINING SCRIPT
# ============================================================================

def get_code_snippets():
    """Show code snippets to add to your existing training script."""
    snippets = """
# ============================================================================
# ADD THESE IMPORTS TO YOUR TRAINING SCRIPT
# ============================================================================

import os
from huggingface_hub import HfApi, login, whoami, create_repo
import json

# ============================================================================
# ADD THIS FUNCTION TO YOUR TRAINING SCRIPT
# ============================================================================

def setup_hf_authentication():
    \"\"\"Set up Hugging Face authentication using GitHub secrets.\"\"\"
    token = os.getenv("HF_TOKEN")
    if not token:
        raise ValueError("HF_TOKEN not found. Please set it in GitHub repository secrets.")
    
    login(token=token)
    api = HfApi()
    user_info = whoami()
    username = user_info["name"]
    
    print(f"βœ… Authentication successful: {username}")
    return api, username

# ============================================================================
# ADD THIS FUNCTION TO YOUR TRAINING SCRIPT
# ============================================================================

def upload_model_after_training(api, username, model_dir, model_size="small", steps=8000):
    \"\"\"Upload the trained model to Hugging Face Hub.\"\"\"
    repo_name = f"openllm-{model_size}-extended-{steps//1000}k"
    repo_id = f"{username}/{repo_name}"
    
    # Create repository
    create_repo(repo_id=repo_id, repo_type="model", exist_ok=True)
    
    # Upload all files
    api.upload_folder(
        folder_path=model_dir,
        repo_id=repo_id,
        repo_type="model",
        commit_message=f"Add OpenLLM {model_size} model ({steps} steps)"
    )
    
    print(f"βœ… Model uploaded: https://huggingface.co/{repo_id}")
    return repo_id

# ============================================================================
# MODIFY YOUR MAIN TRAINING FUNCTION
# ============================================================================

def main():
    # Step 1: Set up authentication
    api, username = setup_hf_authentication()
    
    # Step 2: Your existing training code
    # ... your training code here ...
    
    # Step 3: Upload after training
    model_dir = "./openllm-trained"  # Your model directory
    repo_id = upload_model_after_training(api, username, model_dir)
    
    print(f"πŸŽ‰ Training and upload completed!")

if __name__ == "__main__":
    main()
"""
    return snippets


def main():
    """Main function to demonstrate integration."""
    print("πŸ”§ Integration Guide: Add Authentication to Existing Training")
    print("=" * 65)
    
    # Show example integration
    example_integration()
    
    # Show code snippets
    print("\n" + "="*65)
    print("πŸ“ CODE SNIPPETS FOR YOUR EXISTING TRAINING SCRIPT")
    print("="*65)
    print(get_code_snippets())


if __name__ == "__main__":
    main()