Spaces:

ResearchEngineering
/

AGI

Running

AGI / cpp /model_manager.h

Dmitry Beresnev

Refactor the C++ LLM manager into modular components, moves Python modules under python/, and keeps the current control-plane behavior intact. The C++ server now has clearer separation for config, model lifecycle, runtime services, request parsing, HTTP helpers, and server routing, while Docker build/runtime paths were updated to compile multiple C++ files and load Python code from the new package folder

332826f 1 day ago

raw

history blame contribute delete

1.2 kB

	#pragma once

	#include <mutex>
	#include <optional>
	#include <string>
	#include <utility>

	#include "llm_manager_types.h"

	class ModelManager {
	public:
	explicit ModelManager(const ManagerConfig &config);

	bool initialize_default(std::string &error);
	bool switch_model(const std::string &model, std::string &error);
	bool restart_active(std::string &error);
	std::optional<WorkerInfo> active_worker();
	json models_view();

	private:
	std::mutex mu_;
	std::optional<WorkerInfo> active_;
	bool switch_in_progress_ = false;

	std::string default_model_;
	std::string llama_server_bin_;
	std::string worker_host_;
	std::string worker_bind_host_;
	int base_port_;
	int switch_timeout_sec_;
	int n_ctx_;
	int n_threads_;
	int n_gpu_layers_;
	int n_batch_;
	int n_ubatch_;
	int next_port_;

	int allocate_port();
	void finish_switch(bool ok);
	pid_t spawn_worker(const std::string &model, int port);
	bool wait_until_ready(pid_t pid, int port, int timeout_sec);
	std::pair<int, std::string> http_get(int port, const std::string &target);
	};

	bool is_alive(pid_t pid);
	void shutdown_worker(pid_t pid, int wait_seconds = 15);