lemms commited on
Commit
57d76cc
Β·
verified Β·
1 Parent(s): b3c83da

Add integration guide

Browse files
Files changed (1) hide show
  1. integrate_auth_into_training.py +283 -0
integrate_auth_into_training.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Integration Guide: Add Authentication to Existing Training Code
4
+
5
+ This script shows how to integrate Hugging Face authentication into your
6
+ existing OpenLLM training code. Copy the relevant parts into your training script.
7
+
8
+ Usage:
9
+ Use this as a reference to update your existing training code.
10
+ """
11
+
12
+ import os
13
+ import sys
14
+ import json
15
+
16
+ try:
17
+ from huggingface_hub import HfApi, login, whoami, create_repo
18
+ HF_AVAILABLE = True
19
+ except ImportError:
20
+ HF_AVAILABLE = False
21
+ print("❌ huggingface_hub not installed")
22
+ sys.exit(1)
23
+
24
+
25
+ def setup_hf_authentication():
26
+ """
27
+ Set up Hugging Face authentication using GitHub secrets.
28
+ Add this function to your training script.
29
+ """
30
+ print("πŸ” Setting up Hugging Face Authentication")
31
+ print("-" * 40)
32
+
33
+ try:
34
+ # Get token from GitHub secrets
35
+ token = os.getenv("HF_TOKEN")
36
+ if not token:
37
+ raise ValueError("HF_TOKEN not found. Please set it in GitHub repository secrets.")
38
+
39
+ # Login
40
+ login(token=token)
41
+
42
+ # Get user info
43
+ api = HfApi()
44
+ user_info = whoami()
45
+ username = user_info["name"]
46
+
47
+ print(f"βœ… Authentication successful!")
48
+ print(f" - Username: {username}")
49
+ print(f" - Source: GitHub secrets")
50
+
51
+ return api, username
52
+
53
+ except Exception as e:
54
+ print(f"❌ Authentication failed: {e}")
55
+ raise
56
+
57
+
58
+ def upload_model_after_training(api, username, model_dir, model_size="small", steps=8000):
59
+ """
60
+ Upload the trained model to Hugging Face Hub.
61
+ Call this function after your training completes.
62
+ """
63
+ try:
64
+ # Create repository name
65
+ repo_name = f"openllm-{model_size}-extended-{steps//1000}k"
66
+ repo_id = f"{username}/{repo_name}"
67
+
68
+ print(f"\nπŸ“€ Uploading model to {repo_id}")
69
+
70
+ # Create repository
71
+ create_repo(
72
+ repo_id=repo_id,
73
+ repo_type="model",
74
+ exist_ok=True,
75
+ private=False
76
+ )
77
+
78
+ # Create model configuration
79
+ config = {
80
+ "architectures": ["GPTModel"],
81
+ "model_type": "gpt",
82
+ "vocab_size": 32000,
83
+ "n_positions": 2048,
84
+ "n_embd": 768 if model_size == "small" else 1024 if model_size == "medium" else 1280,
85
+ "n_layer": 12 if model_size == "small" else 24 if model_size == "medium" else 32,
86
+ "n_head": 12 if model_size == "small" else 16 if model_size == "medium" else 20,
87
+ "bos_token_id": 1,
88
+ "eos_token_id": 2,
89
+ "pad_token_id": 0,
90
+ "unk_token_id": 3,
91
+ "transformers_version": "4.35.0",
92
+ "use_cache": True
93
+ }
94
+
95
+ config_path = os.path.join(model_dir, "config.json")
96
+ with open(config_path, "w") as f:
97
+ json.dump(config, f, indent=2)
98
+
99
+ # Create model card
100
+ model_card = f"""# OpenLLM {model_size.capitalize()} Model ({steps} steps)
101
+
102
+ This is a trained OpenLLM {model_size} model with extended training.
103
+
104
+ ## Model Details
105
+ - **Model Type**: GPT-style decoder-only transformer
106
+ - **Architecture**: Custom OpenLLM implementation
107
+ - **Training Data**: SQUAD dataset (Wikipedia passages)
108
+ - **Vocabulary Size**: 32,000 tokens
109
+ - **Sequence Length**: 2,048 tokens
110
+ - **Model Size**: {model_size.capitalize()}
111
+ - **Training Steps**: {steps:,}
112
+
113
+ ## Usage
114
+ This model can be used with the OpenLLM framework for text generation and language modeling tasks.
115
+
116
+ ## License
117
+ This model is released under the GNU General Public License v3.0.
118
+
119
+ ## Repository
120
+ This model is hosted on Hugging Face Hub: https://huggingface.co/{repo_id}
121
+ """
122
+
123
+ readme_path = os.path.join(model_dir, "README.md")
124
+ with open(readme_path, "w") as f:
125
+ f.write(model_card)
126
+
127
+ # Upload all files
128
+ api.upload_folder(
129
+ folder_path=model_dir,
130
+ repo_id=repo_id,
131
+ repo_type="model",
132
+ commit_message=f"Add OpenLLM {model_size} model ({steps} steps)"
133
+ )
134
+
135
+ print(f"βœ… Model uploaded successfully!")
136
+ print(f" - Repository: https://huggingface.co/{repo_id}")
137
+
138
+ return repo_id
139
+
140
+ except Exception as e:
141
+ print(f"❌ Upload failed: {e}")
142
+ raise
143
+
144
+
145
+ # ============================================================================
146
+ # INTEGRATION EXAMPLE: How to modify your existing training code
147
+ # ============================================================================
148
+
149
+ def example_integration():
150
+ """
151
+ Example of how to integrate authentication into your existing training code.
152
+ """
153
+ print("πŸš€ Example: Integrating Authentication into Training")
154
+ print("=" * 55)
155
+
156
+ # Step 1: Set up authentication at the start
157
+ print("\n1️⃣ Setting up authentication...")
158
+ api, username = setup_hf_authentication()
159
+
160
+ # Step 2: Your existing training code goes here
161
+ print("\n2️⃣ Running your existing training code...")
162
+ print(" - This is where your actual training happens")
163
+ print(" - Training saves model to: ./openllm-trained")
164
+
165
+ # Simulate training completion
166
+ model_dir = "./openllm-trained"
167
+ os.makedirs(model_dir, exist_ok=True)
168
+
169
+ # Create dummy model file
170
+ with open(os.path.join(model_dir, "best_model.pt"), "w") as f:
171
+ f.write("Dummy model file")
172
+
173
+ print(" βœ… Training completed!")
174
+
175
+ # Step 3: Upload model after training
176
+ print("\n3️⃣ Uploading model...")
177
+ repo_id = upload_model_after_training(
178
+ api=api,
179
+ username=username,
180
+ model_dir=model_dir,
181
+ model_size="small",
182
+ steps=8000
183
+ )
184
+
185
+ print(f"\nπŸŽ‰ Success! Model available at: https://huggingface.co/{repo_id}")
186
+
187
+
188
+ # ============================================================================
189
+ # CODE SNIPPETS FOR YOUR EXISTING TRAINING SCRIPT
190
+ # ============================================================================
191
+
192
+ def get_code_snippets():
193
+ """Show code snippets to add to your existing training script."""
194
+ snippets = """
195
+ # ============================================================================
196
+ # ADD THESE IMPORTS TO YOUR TRAINING SCRIPT
197
+ # ============================================================================
198
+
199
+ import os
200
+ from huggingface_hub import HfApi, login, whoami, create_repo
201
+ import json
202
+
203
+ # ============================================================================
204
+ # ADD THIS FUNCTION TO YOUR TRAINING SCRIPT
205
+ # ============================================================================
206
+
207
+ def setup_hf_authentication():
208
+ \"\"\"Set up Hugging Face authentication using GitHub secrets.\"\"\"
209
+ token = os.getenv("HF_TOKEN")
210
+ if not token:
211
+ raise ValueError("HF_TOKEN not found. Please set it in GitHub repository secrets.")
212
+
213
+ login(token=token)
214
+ api = HfApi()
215
+ user_info = whoami()
216
+ username = user_info["name"]
217
+
218
+ print(f"βœ… Authentication successful: {username}")
219
+ return api, username
220
+
221
+ # ============================================================================
222
+ # ADD THIS FUNCTION TO YOUR TRAINING SCRIPT
223
+ # ============================================================================
224
+
225
+ def upload_model_after_training(api, username, model_dir, model_size="small", steps=8000):
226
+ \"\"\"Upload the trained model to Hugging Face Hub.\"\"\"
227
+ repo_name = f"openllm-{model_size}-extended-{steps//1000}k"
228
+ repo_id = f"{username}/{repo_name}"
229
+
230
+ # Create repository
231
+ create_repo(repo_id=repo_id, repo_type="model", exist_ok=True)
232
+
233
+ # Upload all files
234
+ api.upload_folder(
235
+ folder_path=model_dir,
236
+ repo_id=repo_id,
237
+ repo_type="model",
238
+ commit_message=f"Add OpenLLM {model_size} model ({steps} steps)"
239
+ )
240
+
241
+ print(f"βœ… Model uploaded: https://huggingface.co/{repo_id}")
242
+ return repo_id
243
+
244
+ # ============================================================================
245
+ # MODIFY YOUR MAIN TRAINING FUNCTION
246
+ # ============================================================================
247
+
248
+ def main():
249
+ # Step 1: Set up authentication
250
+ api, username = setup_hf_authentication()
251
+
252
+ # Step 2: Your existing training code
253
+ # ... your training code here ...
254
+
255
+ # Step 3: Upload after training
256
+ model_dir = "./openllm-trained" # Your model directory
257
+ repo_id = upload_model_after_training(api, username, model_dir)
258
+
259
+ print(f"πŸŽ‰ Training and upload completed!")
260
+
261
+ if __name__ == "__main__":
262
+ main()
263
+ """
264
+ return snippets
265
+
266
+
267
+ def main():
268
+ """Main function to demonstrate integration."""
269
+ print("πŸ”§ Integration Guide: Add Authentication to Existing Training")
270
+ print("=" * 65)
271
+
272
+ # Show example integration
273
+ example_integration()
274
+
275
+ # Show code snippets
276
+ print("\n" + "="*65)
277
+ print("πŸ“ CODE SNIPPETS FOR YOUR EXISTING TRAINING SCRIPT")
278
+ print("="*65)
279
+ print(get_code_snippets())
280
+
281
+
282
+ if __name__ == "__main__":
283
+ main()