Spaces:

George-API
/

phi4training

Sleeping

File size: 6,267 Bytes

a57357b

import gradio as gr
import os
import subprocess
import sys
import json
import re
from threading import Thread
import datetime
import torch
import threading

def load_env_variables():
    """Load environment variables from system or .env file."""
    if os.environ.get("SPACE_ID"):
        print("Running in Hugging Face Space")
        if "/" in os.environ.get("SPACE_ID", ""):
            username = os.environ.get("SPACE_ID").split("/")[0]
            os.environ["HF_USERNAME"] = username
            print(f"Set HF_USERNAME from SPACE_ID: {username}")
    else:
        try:
            from dotenv import load_dotenv
            env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".env")
            if os.path.exists(env_path):
                load_dotenv(env_path)
                print(f"Loaded environment variables from {env_path}")
        except ImportError:
            print("python-dotenv not installed, skipping .env loading")

def check_environment():
    """Check the environment for GPU availability and other requirements."""
    env_info = {
        "System": {
            "Platform": sys.platform,
            "Python Version": sys.version.split()[0]
        },
        "GPU": {
            "CUDA Available": torch.cuda.is_available(),
            "Device Count": torch.cuda.device_count() if torch.cuda.is_available() else 0
        },
        "Environment Variables": {
            "HF_TOKEN": bool(os.environ.get("HF_TOKEN")),
            "HF_USERNAME": bool(os.environ.get("HF_USERNAME")),
            "HF_SPACE_NAME": bool(os.environ.get("HF_SPACE_NAME"))
        }
    }
    
    if torch.cuda.is_available():
        env_info["GPU"]["Device Name"] = torch.cuda.get_device_name(0)
        env_info["GPU"]["Memory (GB)"] = round(torch.cuda.get_device_properties(0).total_memory / (1024**3), 2)
    
    return env_info

def run_training_process():
    """Run the training process using the configuration files."""
    try:
        current_dir = os.path.dirname(os.path.abspath(__file__))
        training_script = os.path.join(current_dir, "run_transformers_training.py")
        
        # Start the training process
        process = subprocess.Popen(
            [sys.executable, training_script],
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            bufsize=1
        )
        
        # Process the output line by line
        for line in process.stdout:
            print(line.strip())
        
        process.wait()
        return process.returncode
    except Exception as e:
        print(f"Error in training process: {e}")
        return 1

def start_training(learning_rate, num_train_epochs, per_device_train_batch_size, 
                 gradient_accumulation_steps):
    """Start the training process with the specified parameters."""
    try:
        load_env_variables()
        current_dir = os.path.dirname(os.path.abspath(__file__))
        
        # Load and update transformers config
        with open(os.path.join(current_dir, "transformers_config.json"), "r") as f:
            config = json.load(f)
        
        # Update training parameters
        config["training"].update({
            "num_train_epochs": num_train_epochs,
            "learning_rate": learning_rate,
            "per_device_train_batch_size": per_device_train_batch_size,
            "gradient_accumulation_steps": gradient_accumulation_steps
        })
        
        # Update hub settings if username is available
        if os.environ.get("HF_USERNAME"):
            config["huggingface_hub"].update({
                "hub_model_id": f"{os.environ['HF_USERNAME']}/Phi4-Cognitive-Science"
            })
        
        # Save updated config
        with open(os.path.join(current_dir, "transformers_config.json"), "w") as f:
            json.dump(config, f, indent=4)
        
        # Start training in a separate thread
        thread = threading.Thread(target=run_training_process)
        thread.daemon = True
        thread.start()
        
        return "Training started! Check the Hugging Face Space logs for progress."
    except Exception as e:
        return f"Error starting training: {str(e)}"

with gr.Blocks(title="Phi-4 Training Interface") as demo:
    gr.Markdown("# Phi-4 Unsupervised Training for Cognitive Science")
    
    with gr.Tab("Training"):
        with gr.Row():
            with gr.Column():
                gr.Markdown("## Model Configuration")
                gr.Markdown("**Model**: unsloth/phi-4-unsloth-bnb-4bit")
                gr.Markdown("**Dataset**: George-API/cognitive-data")
                
                gr.Markdown("## Training Parameters")
                learning_rate = gr.Slider(minimum=1e-6, maximum=1e-4, value=2e-5, step=1e-6, 
                                       label="Learning Rate")
                num_train_epochs = gr.Slider(minimum=1, maximum=5, value=3, step=1, 
                                          label="Number of Epochs")
                per_device_train_batch_size = gr.Slider(minimum=4, maximum=24, value=12, step=4, 
                                                      label="Per Device Train Batch Size (Unsloth Optimized)")
                gradient_accumulation_steps = gr.Slider(minimum=1, maximum=8, value=4, step=1, 
                                                     label="Gradient Accumulation Steps")
                
                start_btn = gr.Button("Start Training", variant="primary")
                training_output = gr.Textbox(label="Training Output", interactive=False)
    
    with gr.Tab("Environment"):
        with gr.Row():
            with gr.Column():
                gr.Markdown("## Environment Information")
                env_info = gr.JSON(label="Environment Info")
                check_env_btn = gr.Button("Check Environment")
    
    # Set up event handlers
    start_btn.click(
        fn=start_training,
        inputs=[learning_rate, num_train_epochs, per_device_train_batch_size, gradient_accumulation_steps],
        outputs=training_output
    )
    
    check_env_btn.click(
        fn=check_environment,
        inputs=[],
        outputs=env_info
    )

if __name__ == "__main__":
    load_env_variables()
    demo.launch()