File size: 6,267 Bytes
a57357b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import gradio as gr
import os
import subprocess
import sys
import json
import re
from threading import Thread
import datetime
import torch
import threading

def load_env_variables():
    """Load environment variables from system or .env file."""
    if os.environ.get("SPACE_ID"):
        print("Running in Hugging Face Space")
        if "/" in os.environ.get("SPACE_ID", ""):
            username = os.environ.get("SPACE_ID").split("/")[0]
            os.environ["HF_USERNAME"] = username
            print(f"Set HF_USERNAME from SPACE_ID: {username}")
    else:
        try:
            from dotenv import load_dotenv
            env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".env")
            if os.path.exists(env_path):
                load_dotenv(env_path)
                print(f"Loaded environment variables from {env_path}")
        except ImportError:
            print("python-dotenv not installed, skipping .env loading")

def check_environment():
    """Check the environment for GPU availability and other requirements."""
    env_info = {
        "System": {
            "Platform": sys.platform,
            "Python Version": sys.version.split()[0]
        },
        "GPU": {
            "CUDA Available": torch.cuda.is_available(),
            "Device Count": torch.cuda.device_count() if torch.cuda.is_available() else 0
        },
        "Environment Variables": {
            "HF_TOKEN": bool(os.environ.get("HF_TOKEN")),
            "HF_USERNAME": bool(os.environ.get("HF_USERNAME")),
            "HF_SPACE_NAME": bool(os.environ.get("HF_SPACE_NAME"))
        }
    }
    
    if torch.cuda.is_available():
        env_info["GPU"]["Device Name"] = torch.cuda.get_device_name(0)
        env_info["GPU"]["Memory (GB)"] = round(torch.cuda.get_device_properties(0).total_memory / (1024**3), 2)
    
    return env_info

def run_training_process():
    """Run the training process using the configuration files."""
    try:
        current_dir = os.path.dirname(os.path.abspath(__file__))
        training_script = os.path.join(current_dir, "run_transformers_training.py")
        
        # Start the training process
        process = subprocess.Popen(
            [sys.executable, training_script],
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            bufsize=1
        )
        
        # Process the output line by line
        for line in process.stdout:
            print(line.strip())
        
        process.wait()
        return process.returncode
    except Exception as e:
        print(f"Error in training process: {e}")
        return 1

def start_training(learning_rate, num_train_epochs, per_device_train_batch_size, 
                 gradient_accumulation_steps):
    """Start the training process with the specified parameters."""
    try:
        load_env_variables()
        current_dir = os.path.dirname(os.path.abspath(__file__))
        
        # Load and update transformers config
        with open(os.path.join(current_dir, "transformers_config.json"), "r") as f:
            config = json.load(f)
        
        # Update training parameters
        config["training"].update({
            "num_train_epochs": num_train_epochs,
            "learning_rate": learning_rate,
            "per_device_train_batch_size": per_device_train_batch_size,
            "gradient_accumulation_steps": gradient_accumulation_steps
        })
        
        # Update hub settings if username is available
        if os.environ.get("HF_USERNAME"):
            config["huggingface_hub"].update({
                "hub_model_id": f"{os.environ['HF_USERNAME']}/Phi4-Cognitive-Science"
            })
        
        # Save updated config
        with open(os.path.join(current_dir, "transformers_config.json"), "w") as f:
            json.dump(config, f, indent=4)
        
        # Start training in a separate thread
        thread = threading.Thread(target=run_training_process)
        thread.daemon = True
        thread.start()
        
        return "Training started! Check the Hugging Face Space logs for progress."
    except Exception as e:
        return f"Error starting training: {str(e)}"

with gr.Blocks(title="Phi-4 Training Interface") as demo:
    gr.Markdown("# Phi-4 Unsupervised Training for Cognitive Science")
    
    with gr.Tab("Training"):
        with gr.Row():
            with gr.Column():
                gr.Markdown("## Model Configuration")
                gr.Markdown("**Model**: unsloth/phi-4-unsloth-bnb-4bit")
                gr.Markdown("**Dataset**: George-API/cognitive-data")
                
                gr.Markdown("## Training Parameters")
                learning_rate = gr.Slider(minimum=1e-6, maximum=1e-4, value=2e-5, step=1e-6, 
                                       label="Learning Rate")
                num_train_epochs = gr.Slider(minimum=1, maximum=5, value=3, step=1, 
                                          label="Number of Epochs")
                per_device_train_batch_size = gr.Slider(minimum=4, maximum=24, value=12, step=4, 
                                                      label="Per Device Train Batch Size (Unsloth Optimized)")
                gradient_accumulation_steps = gr.Slider(minimum=1, maximum=8, value=4, step=1, 
                                                     label="Gradient Accumulation Steps")
                
                start_btn = gr.Button("Start Training", variant="primary")
                training_output = gr.Textbox(label="Training Output", interactive=False)
    
    with gr.Tab("Environment"):
        with gr.Row():
            with gr.Column():
                gr.Markdown("## Environment Information")
                env_info = gr.JSON(label="Environment Info")
                check_env_btn = gr.Button("Check Environment")
    
    # Set up event handlers
    start_btn.click(
        fn=start_training,
        inputs=[learning_rate, num_train_epochs, per_device_train_batch_size, gradient_accumulation_steps],
        outputs=training_output
    )
    
    check_env_btn.click(
        fn=check_environment,
        inputs=[],
        outputs=env_info
    )

if __name__ == "__main__":
    load_env_variables()
    demo.launch()