|
|
|
""" |
|
Smart startup script for AI Dataset Studio |
|
Automatically detects available features and chooses the best version to run |
|
""" |
|
|
|
import sys |
|
import logging |
|
import importlib |
|
from typing import Dict, List, Tuple |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(levelname)s - %(message)s' |
|
) |
|
logger = logging.getLogger(__name__) |
|
|
|
def check_import(module_name: str, package_name: str = None) -> Tuple[bool, str]: |
|
"""Check if a module can be imported""" |
|
try: |
|
importlib.import_module(module_name) |
|
return True, f"β
{module_name}" |
|
except ImportError as e: |
|
return False, f"β {module_name}: {str(e)}" |
|
|
|
def diagnose_system() -> Dict[str, bool]: |
|
"""Diagnose system capabilities""" |
|
logger.info("π Diagnosing system capabilities...") |
|
|
|
|
|
essential_deps = [ |
|
('gradio', 'gradio'), |
|
('requests', 'requests'), |
|
('bs4', 'beautifulsoup4'), |
|
('pandas', 'pandas'), |
|
('numpy', 'numpy') |
|
] |
|
|
|
|
|
optional_deps = [ |
|
('transformers', 'transformers'), |
|
('torch', 'torch'), |
|
('datasets', 'datasets'), |
|
('nltk', 'nltk'), |
|
('sentence_transformers', 'sentence-transformers') |
|
] |
|
|
|
results = { |
|
'essential_available': True, |
|
'ai_models_available': False, |
|
'nlp_available': False, |
|
'datasets_available': False, |
|
'missing_essential': [], |
|
'missing_optional': [] |
|
} |
|
|
|
|
|
logger.info("π Checking essential dependencies...") |
|
for module, package in essential_deps: |
|
available, msg = check_import(module, package) |
|
logger.info(f" {msg}") |
|
if not available: |
|
results['essential_available'] = False |
|
results['missing_essential'].append(package) |
|
|
|
|
|
logger.info("π Checking optional dependencies...") |
|
for module, package in optional_deps: |
|
available, msg = check_import(module, package) |
|
logger.info(f" {msg}") |
|
if not available: |
|
results['missing_optional'].append(package) |
|
else: |
|
if module in ['transformers', 'torch']: |
|
results['ai_models_available'] = True |
|
elif module == 'nltk': |
|
results['nlp_available'] = True |
|
elif module == 'datasets': |
|
results['datasets_available'] = True |
|
|
|
return results |
|
|
|
def test_gpu_availability() -> bool: |
|
"""Test if GPU is available""" |
|
try: |
|
import torch |
|
gpu_available = torch.cuda.is_available() |
|
if gpu_available: |
|
gpu_name = torch.cuda.get_device_name(0) |
|
logger.info(f"π GPU available: {gpu_name}") |
|
else: |
|
logger.info("π» Using CPU (GPU not available)") |
|
return gpu_available |
|
except ImportError: |
|
logger.info("π» Using CPU (PyTorch not available)") |
|
return False |
|
|
|
def install_missing_packages(packages: List[str]) -> bool: |
|
"""Attempt to install missing packages""" |
|
if not packages: |
|
return True |
|
|
|
logger.info(f"π¦ Attempting to install missing packages: {', '.join(packages)}") |
|
|
|
try: |
|
import subprocess |
|
|
|
|
|
cmd = [sys.executable, "-m", "pip", "install"] + packages |
|
result = subprocess.run(cmd, capture_output=True, text=True) |
|
|
|
if result.returncode == 0: |
|
logger.info("β
Packages installed successfully!") |
|
return True |
|
else: |
|
logger.error(f"β Installation failed: {result.stderr}") |
|
return False |
|
|
|
except Exception as e: |
|
logger.error(f"β Installation error: {e}") |
|
return False |
|
|
|
def run_full_version(): |
|
"""Run the full-featured version""" |
|
logger.info("π Starting full AI Dataset Studio...") |
|
try: |
|
|
|
import app |
|
logger.info("β
Full version loaded successfully") |
|
except Exception as e: |
|
logger.error(f"β Full version failed: {e}") |
|
raise |
|
|
|
def run_minimal_version(): |
|
"""Run the minimal version""" |
|
logger.info("π Starting minimal AI Dataset Studio...") |
|
try: |
|
|
|
import app_minimal |
|
logger.info("β
Minimal version loaded successfully") |
|
except Exception as e: |
|
logger.error(f"β Minimal version failed: {e}") |
|
raise |
|
|
|
def show_feature_summary(results: Dict[str, bool]): |
|
"""Show a summary of available features""" |
|
logger.info("π Feature Summary:") |
|
|
|
if results['essential_available']: |
|
logger.info(" β
Core web scraping and data processing") |
|
logger.info(" β
CSV and JSON export") |
|
logger.info(" β
Quality filtering and text cleaning") |
|
|
|
if results['ai_models_available']: |
|
logger.info(" β
AI-powered sentiment analysis") |
|
logger.info(" β
Named entity recognition") |
|
logger.info(" β
Advanced content quality assessment") |
|
else: |
|
logger.info(" β οΈ AI features disabled (install transformers + torch)") |
|
|
|
if results['nlp_available']: |
|
logger.info(" β
Advanced text processing with NLTK") |
|
else: |
|
logger.info(" β οΈ Basic text processing only (install nltk)") |
|
|
|
if results['datasets_available']: |
|
logger.info(" β
HuggingFace Datasets export") |
|
else: |
|
logger.info(" β οΈ Standard export only (install datasets)") |
|
|
|
def main(): |
|
"""Main startup function""" |
|
print("π AI Dataset Studio - Smart Startup") |
|
print("=" * 50) |
|
|
|
|
|
results = diagnose_system() |
|
|
|
|
|
show_feature_summary(results) |
|
|
|
|
|
gpu_available = test_gpu_availability() |
|
|
|
print("\n" + "=" * 50) |
|
|
|
|
|
if not results['essential_available']: |
|
logger.error("β Essential dependencies missing!") |
|
logger.error("π‘ Please install required packages:") |
|
logger.error(" pip install gradio pandas requests beautifulsoup4") |
|
|
|
|
|
user_input = input("\nπ€ Try to install missing packages automatically? (y/n): ") |
|
if user_input.lower() in ['y', 'yes']: |
|
if install_missing_packages(results['missing_essential']): |
|
logger.info("π Restarting with new packages...") |
|
|
|
results = diagnose_system() |
|
else: |
|
logger.error("β Automatic installation failed") |
|
sys.exit(1) |
|
else: |
|
sys.exit(1) |
|
|
|
|
|
if results['essential_available']: |
|
if results['ai_models_available']: |
|
logger.info("π― Running full-featured version with AI capabilities") |
|
try: |
|
run_full_version() |
|
except Exception as e: |
|
logger.error(f"β Full version failed, falling back to minimal: {e}") |
|
run_minimal_version() |
|
else: |
|
logger.info("π― Running minimal version (AI features not available)") |
|
run_minimal_version() |
|
else: |
|
logger.error("β Cannot start - essential dependencies missing") |
|
sys.exit(1) |
|
|
|
if __name__ == "__main__": |
|
try: |
|
main() |
|
except KeyboardInterrupt: |
|
logger.info("\nπ Startup cancelled by user") |
|
sys.exit(0) |
|
except Exception as e: |
|
logger.error(f"β Startup failed: {e}") |
|
logger.error("π‘ Try running directly: python app_minimal.py") |
|
sys.exit(1) |