Spaces:
Running
Running
Dmitry Beresnev
Refactor the C++ LLM manager into modular components, moves Python modules under python/, and keeps the current control-plane behavior intact. The C++ server now has clearer separation for config, model lifecycle, runtime services, request parsing, HTTP helpers, and server routing, while Docker build/runtime paths were updated to compile multiple C++ files and load Python code from the new package folder
332826f | class ModelManager { | |
| public: | |
| explicit ModelManager(const ManagerConfig &config); | |
| bool initialize_default(std::string &error); | |
| bool switch_model(const std::string &model, std::string &error); | |
| bool restart_active(std::string &error); | |
| std::optional<WorkerInfo> active_worker(); | |
| json models_view(); | |
| private: | |
| std::mutex mu_; | |
| std::optional<WorkerInfo> active_; | |
| bool switch_in_progress_ = false; | |
| std::string default_model_; | |
| std::string llama_server_bin_; | |
| std::string worker_host_; | |
| std::string worker_bind_host_; | |
| int base_port_; | |
| int switch_timeout_sec_; | |
| int n_ctx_; | |
| int n_threads_; | |
| int n_gpu_layers_; | |
| int n_batch_; | |
| int n_ubatch_; | |
| int next_port_; | |
| int allocate_port(); | |
| void finish_switch(bool ok); | |
| pid_t spawn_worker(const std::string &model, int port); | |
| bool wait_until_ready(pid_t pid, int port, int timeout_sec); | |
| std::pair<int, std::string> http_get(int port, const std::string &target); | |
| }; | |
| bool is_alive(pid_t pid); | |
| void shutdown_worker(pid_t pid, int wait_seconds = 15); | |