Spaces:
Sleeping
Sleeping
File size: 6,267 Bytes
a57357b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import gradio as gr
import os
import subprocess
import sys
import json
import re
from threading import Thread
import datetime
import torch
import threading
def load_env_variables():
"""Load environment variables from system or .env file."""
if os.environ.get("SPACE_ID"):
print("Running in Hugging Face Space")
if "/" in os.environ.get("SPACE_ID", ""):
username = os.environ.get("SPACE_ID").split("/")[0]
os.environ["HF_USERNAME"] = username
print(f"Set HF_USERNAME from SPACE_ID: {username}")
else:
try:
from dotenv import load_dotenv
env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".env")
if os.path.exists(env_path):
load_dotenv(env_path)
print(f"Loaded environment variables from {env_path}")
except ImportError:
print("python-dotenv not installed, skipping .env loading")
def check_environment():
"""Check the environment for GPU availability and other requirements."""
env_info = {
"System": {
"Platform": sys.platform,
"Python Version": sys.version.split()[0]
},
"GPU": {
"CUDA Available": torch.cuda.is_available(),
"Device Count": torch.cuda.device_count() if torch.cuda.is_available() else 0
},
"Environment Variables": {
"HF_TOKEN": bool(os.environ.get("HF_TOKEN")),
"HF_USERNAME": bool(os.environ.get("HF_USERNAME")),
"HF_SPACE_NAME": bool(os.environ.get("HF_SPACE_NAME"))
}
}
if torch.cuda.is_available():
env_info["GPU"]["Device Name"] = torch.cuda.get_device_name(0)
env_info["GPU"]["Memory (GB)"] = round(torch.cuda.get_device_properties(0).total_memory / (1024**3), 2)
return env_info
def run_training_process():
"""Run the training process using the configuration files."""
try:
current_dir = os.path.dirname(os.path.abspath(__file__))
training_script = os.path.join(current_dir, "run_transformers_training.py")
# Start the training process
process = subprocess.Popen(
[sys.executable, training_script],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1
)
# Process the output line by line
for line in process.stdout:
print(line.strip())
process.wait()
return process.returncode
except Exception as e:
print(f"Error in training process: {e}")
return 1
def start_training(learning_rate, num_train_epochs, per_device_train_batch_size,
gradient_accumulation_steps):
"""Start the training process with the specified parameters."""
try:
load_env_variables()
current_dir = os.path.dirname(os.path.abspath(__file__))
# Load and update transformers config
with open(os.path.join(current_dir, "transformers_config.json"), "r") as f:
config = json.load(f)
# Update training parameters
config["training"].update({
"num_train_epochs": num_train_epochs,
"learning_rate": learning_rate,
"per_device_train_batch_size": per_device_train_batch_size,
"gradient_accumulation_steps": gradient_accumulation_steps
})
# Update hub settings if username is available
if os.environ.get("HF_USERNAME"):
config["huggingface_hub"].update({
"hub_model_id": f"{os.environ['HF_USERNAME']}/Phi4-Cognitive-Science"
})
# Save updated config
with open(os.path.join(current_dir, "transformers_config.json"), "w") as f:
json.dump(config, f, indent=4)
# Start training in a separate thread
thread = threading.Thread(target=run_training_process)
thread.daemon = True
thread.start()
return "Training started! Check the Hugging Face Space logs for progress."
except Exception as e:
return f"Error starting training: {str(e)}"
with gr.Blocks(title="Phi-4 Training Interface") as demo:
gr.Markdown("# Phi-4 Unsupervised Training for Cognitive Science")
with gr.Tab("Training"):
with gr.Row():
with gr.Column():
gr.Markdown("## Model Configuration")
gr.Markdown("**Model**: unsloth/phi-4-unsloth-bnb-4bit")
gr.Markdown("**Dataset**: George-API/cognitive-data")
gr.Markdown("## Training Parameters")
learning_rate = gr.Slider(minimum=1e-6, maximum=1e-4, value=2e-5, step=1e-6,
label="Learning Rate")
num_train_epochs = gr.Slider(minimum=1, maximum=5, value=3, step=1,
label="Number of Epochs")
per_device_train_batch_size = gr.Slider(minimum=4, maximum=24, value=12, step=4,
label="Per Device Train Batch Size (Unsloth Optimized)")
gradient_accumulation_steps = gr.Slider(minimum=1, maximum=8, value=4, step=1,
label="Gradient Accumulation Steps")
start_btn = gr.Button("Start Training", variant="primary")
training_output = gr.Textbox(label="Training Output", interactive=False)
with gr.Tab("Environment"):
with gr.Row():
with gr.Column():
gr.Markdown("## Environment Information")
env_info = gr.JSON(label="Environment Info")
check_env_btn = gr.Button("Check Environment")
# Set up event handlers
start_btn.click(
fn=start_training,
inputs=[learning_rate, num_train_epochs, per_device_train_batch_size, gradient_accumulation_steps],
outputs=training_output
)
check_env_btn.click(
fn=check_environment,
inputs=[],
outputs=env_info
)
if __name__ == "__main__":
load_env_variables()
demo.launch()
|