|
|
|
""" |
|
Integration Guide: Add Authentication to Existing Training Code |
|
|
|
This script shows how to integrate Hugging Face authentication into your |
|
existing OpenLLM training code. Copy the relevant parts into your training script. |
|
|
|
Usage: |
|
Use this as a reference to update your existing training code. |
|
""" |
|
|
|
import os |
|
import sys |
|
import json |
|
|
|
try: |
|
from huggingface_hub import HfApi, login, whoami, create_repo |
|
HF_AVAILABLE = True |
|
except ImportError: |
|
HF_AVAILABLE = False |
|
print("β huggingface_hub not installed") |
|
sys.exit(1) |
|
|
|
|
|
def setup_hf_authentication(): |
|
""" |
|
Set up Hugging Face authentication using GitHub secrets. |
|
Add this function to your training script. |
|
""" |
|
print("π Setting up Hugging Face Authentication") |
|
print("-" * 40) |
|
|
|
try: |
|
|
|
token = os.getenv("HF_TOKEN") |
|
if not token: |
|
raise ValueError("HF_TOKEN not found. Please set it in GitHub repository secrets.") |
|
|
|
|
|
login(token=token) |
|
|
|
|
|
api = HfApi() |
|
user_info = whoami() |
|
username = user_info["name"] |
|
|
|
print(f"β
Authentication successful!") |
|
print(f" - Username: {username}") |
|
print(f" - Source: GitHub secrets") |
|
|
|
return api, username |
|
|
|
except Exception as e: |
|
print(f"β Authentication failed: {e}") |
|
raise |
|
|
|
|
|
def upload_model_after_training(api, username, model_dir, model_size="small", steps=8000): |
|
""" |
|
Upload the trained model to Hugging Face Hub. |
|
Call this function after your training completes. |
|
""" |
|
try: |
|
|
|
repo_name = f"openllm-{model_size}-extended-{steps//1000}k" |
|
repo_id = f"{username}/{repo_name}" |
|
|
|
print(f"\nπ€ Uploading model to {repo_id}") |
|
|
|
|
|
create_repo( |
|
repo_id=repo_id, |
|
repo_type="model", |
|
exist_ok=True, |
|
private=False |
|
) |
|
|
|
|
|
config = { |
|
"architectures": ["GPTModel"], |
|
"model_type": "gpt", |
|
"vocab_size": 32000, |
|
"n_positions": 2048, |
|
"n_embd": 768 if model_size == "small" else 1024 if model_size == "medium" else 1280, |
|
"n_layer": 12 if model_size == "small" else 24 if model_size == "medium" else 32, |
|
"n_head": 12 if model_size == "small" else 16 if model_size == "medium" else 20, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"pad_token_id": 0, |
|
"unk_token_id": 3, |
|
"transformers_version": "4.35.0", |
|
"use_cache": True |
|
} |
|
|
|
config_path = os.path.join(model_dir, "config.json") |
|
with open(config_path, "w") as f: |
|
json.dump(config, f, indent=2) |
|
|
|
|
|
model_card = f"""# OpenLLM {model_size.capitalize()} Model ({steps} steps) |
|
|
|
This is a trained OpenLLM {model_size} model with extended training. |
|
|
|
## Model Details |
|
- **Model Type**: GPT-style decoder-only transformer |
|
- **Architecture**: Custom OpenLLM implementation |
|
- **Training Data**: SQUAD dataset (Wikipedia passages) |
|
- **Vocabulary Size**: 32,000 tokens |
|
- **Sequence Length**: 2,048 tokens |
|
- **Model Size**: {model_size.capitalize()} |
|
- **Training Steps**: {steps:,} |
|
|
|
## Usage |
|
This model can be used with the OpenLLM framework for text generation and language modeling tasks. |
|
|
|
## License |
|
This model is released under the GNU General Public License v3.0. |
|
|
|
## Repository |
|
This model is hosted on Hugging Face Hub: https://huggingface.co/{repo_id} |
|
""" |
|
|
|
readme_path = os.path.join(model_dir, "README.md") |
|
with open(readme_path, "w") as f: |
|
f.write(model_card) |
|
|
|
|
|
api.upload_folder( |
|
folder_path=model_dir, |
|
repo_id=repo_id, |
|
repo_type="model", |
|
commit_message=f"Add OpenLLM {model_size} model ({steps} steps)" |
|
) |
|
|
|
print(f"β
Model uploaded successfully!") |
|
print(f" - Repository: https://huggingface.co/{repo_id}") |
|
|
|
return repo_id |
|
|
|
except Exception as e: |
|
print(f"β Upload failed: {e}") |
|
raise |
|
|
|
|
|
|
|
|
|
|
|
|
|
def example_integration(): |
|
""" |
|
Example of how to integrate authentication into your existing training code. |
|
""" |
|
print("π Example: Integrating Authentication into Training") |
|
print("=" * 55) |
|
|
|
|
|
print("\n1οΈβ£ Setting up authentication...") |
|
api, username = setup_hf_authentication() |
|
|
|
|
|
print("\n2οΈβ£ Running your existing training code...") |
|
print(" - This is where your actual training happens") |
|
print(" - Training saves model to: ./openllm-trained") |
|
|
|
|
|
model_dir = "./openllm-trained" |
|
os.makedirs(model_dir, exist_ok=True) |
|
|
|
|
|
with open(os.path.join(model_dir, "best_model.pt"), "w") as f: |
|
f.write("Dummy model file") |
|
|
|
print(" β
Training completed!") |
|
|
|
|
|
print("\n3οΈβ£ Uploading model...") |
|
repo_id = upload_model_after_training( |
|
api=api, |
|
username=username, |
|
model_dir=model_dir, |
|
model_size="small", |
|
steps=8000 |
|
) |
|
|
|
print(f"\nπ Success! Model available at: https://huggingface.co/{repo_id}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_code_snippets(): |
|
"""Show code snippets to add to your existing training script.""" |
|
snippets = """ |
|
# ============================================================================ |
|
# ADD THESE IMPORTS TO YOUR TRAINING SCRIPT |
|
# ============================================================================ |
|
|
|
import os |
|
from huggingface_hub import HfApi, login, whoami, create_repo |
|
import json |
|
|
|
# ============================================================================ |
|
# ADD THIS FUNCTION TO YOUR TRAINING SCRIPT |
|
# ============================================================================ |
|
|
|
def setup_hf_authentication(): |
|
\"\"\"Set up Hugging Face authentication using GitHub secrets.\"\"\" |
|
token = os.getenv("HF_TOKEN") |
|
if not token: |
|
raise ValueError("HF_TOKEN not found. Please set it in GitHub repository secrets.") |
|
|
|
login(token=token) |
|
api = HfApi() |
|
user_info = whoami() |
|
username = user_info["name"] |
|
|
|
print(f"β
Authentication successful: {username}") |
|
return api, username |
|
|
|
# ============================================================================ |
|
# ADD THIS FUNCTION TO YOUR TRAINING SCRIPT |
|
# ============================================================================ |
|
|
|
def upload_model_after_training(api, username, model_dir, model_size="small", steps=8000): |
|
\"\"\"Upload the trained model to Hugging Face Hub.\"\"\" |
|
repo_name = f"openllm-{model_size}-extended-{steps//1000}k" |
|
repo_id = f"{username}/{repo_name}" |
|
|
|
# Create repository |
|
create_repo(repo_id=repo_id, repo_type="model", exist_ok=True) |
|
|
|
# Upload all files |
|
api.upload_folder( |
|
folder_path=model_dir, |
|
repo_id=repo_id, |
|
repo_type="model", |
|
commit_message=f"Add OpenLLM {model_size} model ({steps} steps)" |
|
) |
|
|
|
print(f"β
Model uploaded: https://huggingface.co/{repo_id}") |
|
return repo_id |
|
|
|
# ============================================================================ |
|
# MODIFY YOUR MAIN TRAINING FUNCTION |
|
# ============================================================================ |
|
|
|
def main(): |
|
# Step 1: Set up authentication |
|
api, username = setup_hf_authentication() |
|
|
|
# Step 2: Your existing training code |
|
# ... your training code here ... |
|
|
|
# Step 3: Upload after training |
|
model_dir = "./openllm-trained" # Your model directory |
|
repo_id = upload_model_after_training(api, username, model_dir) |
|
|
|
print(f"π Training and upload completed!") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
""" |
|
return snippets |
|
|
|
|
|
def main(): |
|
"""Main function to demonstrate integration.""" |
|
print("π§ Integration Guide: Add Authentication to Existing Training") |
|
print("=" * 65) |
|
|
|
|
|
example_integration() |
|
|
|
|
|
print("\n" + "="*65) |
|
print("π CODE SNIPPETS FOR YOUR EXISTING TRAINING SCRIPT") |
|
print("="*65) |
|
print(get_code_snippets()) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|