AGI / cpp /model_manager.h
Dmitry Beresnev
Refactor the C++ LLM manager into modular components, moves Python modules under python/, and keeps the current control-plane behavior intact. The C++ server now has clearer separation for config, model lifecycle, runtime services, request parsing, HTTP helpers, and server routing, while Docker build/runtime paths were updated to compile multiple C++ files and load Python code from the new package folder
332826f
#pragma once
#include <mutex>
#include <optional>
#include <string>
#include <utility>
#include "llm_manager_types.h"
class ModelManager {
public:
explicit ModelManager(const ManagerConfig &config);
bool initialize_default(std::string &error);
bool switch_model(const std::string &model, std::string &error);
bool restart_active(std::string &error);
std::optional<WorkerInfo> active_worker();
json models_view();
private:
std::mutex mu_;
std::optional<WorkerInfo> active_;
bool switch_in_progress_ = false;
std::string default_model_;
std::string llama_server_bin_;
std::string worker_host_;
std::string worker_bind_host_;
int base_port_;
int switch_timeout_sec_;
int n_ctx_;
int n_threads_;
int n_gpu_layers_;
int n_batch_;
int n_ubatch_;
int next_port_;
int allocate_port();
void finish_switch(bool ok);
pid_t spawn_worker(const std::string &model, int port);
bool wait_until_ready(pid_t pid, int port, int timeout_sec);
std::pair<int, std::string> http_get(int port, const std::string &target);
};
bool is_alive(pid_t pid);
void shutdown_worker(pid_t pid, int wait_seconds = 15);