| #!/bin/bash |
| |
| |
| |
| |
| |
|
|
| set -e |
| echo "==========================================" |
| echo " EMOLIPS Setup - Emotion Lip-Sync MVP" |
| echo "==========================================" |
|
|
| |
| echo "[1/6] Installing system dependencies..." |
| apt-get update -qq && apt-get install -y -qq ffmpeg libsndfile1 > /dev/null 2>&1 |
| echo " β System deps installed" |
|
|
| |
| echo "[2/6] Installing Python packages..." |
| pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 2>/dev/null || true |
| pip install -q \ |
| transformers \ |
| librosa \ |
| soundfile \ |
| opencv-python-headless \ |
| mediapipe \ |
| numpy \ |
| scipy \ |
| pillow \ |
| tqdm \ |
| gdown \ |
| pyyaml \ |
| imageio \ |
| imageio-ffmpeg \ |
| scikit-image \ |
| kornia \ |
| face-alignment \ |
| dlib \ |
| gradio \ |
| einops \ |
| safetensors \ |
| accelerate \ |
| yacs \ |
| pydub \ |
| resampy |
| echo " β Python packages installed" |
|
|
| |
| echo "[3/6] Cloning SadTalker backbone..." |
| if [ ! -d "SadTalker" ]; then |
| git clone --depth 1 https://github.com/OpenTalker/SadTalker.git |
| cd SadTalker |
| pip install -q -r requirements.txt 2>/dev/null || true |
| cd .. |
| fi |
| echo " β SadTalker cloned" |
|
|
| |
| echo "[4/6] Downloading SadTalker checkpoints..." |
| cd SadTalker |
| if [ ! -d "checkpoints" ]; then |
| mkdir -p checkpoints |
| |
| bash scripts/download_models.sh 2>/dev/null || { |
| echo " β Auto-download failed. Trying gdown..." |
| mkdir -p checkpoints |
| |
| gdown --fuzzy "https://drive.google.com/uc?id=1gwJEawt0Q_7kJXFnhVYQklsb4HGDSM0D" -O checkpoints/ 2>/dev/null || true |
| echo " β If checkpoints missing, download manually from SadTalker GitHub releases" |
| } |
| fi |
| cd .. |
| echo " β Checkpoints ready (verify manually if needed)" |
|
|
| |
| echo "[5/6] Pre-caching emotion recognition model..." |
| python3 -c " |
| from transformers import pipeline |
| print(' Downloading emotion recognition model...') |
| classifier = pipeline('audio-classification', model='ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition', device=-1) |
| print(' β Emotion model cached') |
| " 2>/dev/null || echo " β Emotion model will download on first inference run" |
|
|
| |
| echo "[6/6] Setting up EMOLIPS project structure..." |
| mkdir -p outputs samples results |
|
|
| echo "" |
| echo "==========================================" |
| echo " SETUP COMPLETE!" |
| echo "==========================================" |
| echo "" |
| echo "Quick test:" |
| echo " python inference.py --audio sample.wav --image face.jpg" |
| echo "" |
| echo "With emotion override:" |
| echo " python inference.py --audio sample.wav --image face.jpg --emotion happy --intensity 0.8" |
| echo "" |
| echo "Run all emotions:" |
| echo " python inference.py --audio sample.wav --image face.jpg --all-emotions" |
| echo "" |
|
|