| # Start llama-server in background | |
| cd /llama.cpp/build | |
| ./bin/llama-server --host 0.0.0.0 --port 8080 --model /models/model.q8_0.gguf --ctx-size 32768 & | |
| # Wait for server to initialize | |
| echo "Waiting for server to start..." | |
| until curl -s "http://localhost:8080/v1/models" >/dev/null; do | |
| sleep 1 | |
| done | |
| echo "Server is ready. Starting Gradio app." | |
| # Start Gradio UI | |
| cd / | |
| python3 app.py |