|
package envconfig |
|
|
|
import ( |
|
"errors" |
|
"fmt" |
|
"log/slog" |
|
"net" |
|
"os" |
|
"path/filepath" |
|
"runtime" |
|
"strconv" |
|
"strings" |
|
) |
|
|
|
type OllamaHost struct { |
|
Scheme string |
|
Host string |
|
Port string |
|
} |
|
|
|
func (o OllamaHost) String() string { |
|
return fmt.Sprintf("%s://%s:%s", o.Scheme, o.Host, o.Port) |
|
} |
|
|
|
var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST") |
|
|
|
var ( |
|
|
|
AllowOrigins []string |
|
|
|
Debug bool |
|
|
|
FlashAttention bool |
|
|
|
Host *OllamaHost |
|
|
|
KeepAlive string |
|
|
|
LLMLibrary string |
|
|
|
MaxRunners int |
|
|
|
MaxQueuedRequests int |
|
|
|
ModelsDir string |
|
|
|
MaxVRAM uint64 |
|
|
|
NoHistory bool |
|
|
|
NoPrune bool |
|
|
|
NumParallel int |
|
|
|
RunnersDir string |
|
|
|
SchedSpread bool |
|
|
|
TmpDir string |
|
|
|
IntelGpu bool |
|
|
|
|
|
CudaVisibleDevices string |
|
|
|
HipVisibleDevices string |
|
|
|
RocrVisibleDevices string |
|
|
|
GpuDeviceOrdinal string |
|
|
|
HsaOverrideGfxVersion string |
|
) |
|
|
|
type EnvVar struct { |
|
Name string |
|
Value any |
|
Description string |
|
} |
|
|
|
func AsMap() map[string]EnvVar { |
|
ret := map[string]EnvVar{ |
|
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"}, |
|
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"}, |
|
"OLLAMA_HOST": {"OLLAMA_HOST", Host, "IP Address for the ollama server (default 127.0.0.1:11434)"}, |
|
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"}, |
|
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"}, |
|
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models (default 1)"}, |
|
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"}, |
|
"OLLAMA_MAX_VRAM": {"OLLAMA_MAX_VRAM", MaxVRAM, "Maximum VRAM"}, |
|
"OLLAMA_MODELS": {"OLLAMA_MODELS", ModelsDir, "The path to the models directory"}, |
|
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"}, |
|
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"}, |
|
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests (default 1)"}, |
|
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"}, |
|
"OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"}, |
|
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread, "Always schedule model across all GPUs"}, |
|
"OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"}, |
|
} |
|
if runtime.GOOS != "darwin" { |
|
ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices, "Set which NVIDIA devices are visible"} |
|
ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices, "Set which AMD devices are visible"} |
|
ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices, "Set which AMD devices are visible"} |
|
ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal, "Set which AMD devices are visible"} |
|
ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion, "Override the gfx used for all detected AMD GPUs"} |
|
ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGpu, "Enable experimental Intel GPU detection"} |
|
} |
|
return ret |
|
} |
|
|
|
func Values() map[string]string { |
|
vals := make(map[string]string) |
|
for k, v := range AsMap() { |
|
vals[k] = fmt.Sprintf("%v", v.Value) |
|
} |
|
return vals |
|
} |
|
|
|
var defaultAllowOrigins = []string{ |
|
"localhost", |
|
"127.0.0.1", |
|
"0.0.0.0", |
|
} |
|
|
|
|
|
func clean(key string) string { |
|
return strings.Trim(os.Getenv(key), "\"' ") |
|
} |
|
|
|
func init() { |
|
|
|
NumParallel = 1 |
|
MaxRunners = 1 |
|
MaxQueuedRequests = 512 |
|
|
|
LoadConfig() |
|
} |
|
|
|
func LoadConfig() { |
|
if debug := clean("OLLAMA_DEBUG"); debug != "" { |
|
d, err := strconv.ParseBool(debug) |
|
if err == nil { |
|
Debug = d |
|
} else { |
|
Debug = true |
|
} |
|
} |
|
|
|
if fa := clean("OLLAMA_FLASH_ATTENTION"); fa != "" { |
|
d, err := strconv.ParseBool(fa) |
|
if err == nil { |
|
FlashAttention = d |
|
} |
|
} |
|
|
|
RunnersDir = clean("OLLAMA_RUNNERS_DIR") |
|
if runtime.GOOS == "windows" && RunnersDir == "" { |
|
|
|
appExe, err := os.Executable() |
|
if err != nil { |
|
slog.Error("failed to lookup executable path", "error", err) |
|
} |
|
|
|
cwd, err := os.Getwd() |
|
if err != nil { |
|
slog.Error("failed to lookup working directory", "error", err) |
|
} |
|
|
|
var paths []string |
|
for _, root := range []string{filepath.Dir(appExe), cwd} { |
|
paths = append(paths, |
|
root, |
|
filepath.Join(root, "windows-"+runtime.GOARCH), |
|
filepath.Join(root, "dist", "windows-"+runtime.GOARCH), |
|
) |
|
} |
|
|
|
|
|
for _, p := range paths { |
|
candidate := filepath.Join(p, "ollama_runners") |
|
_, err := os.Stat(candidate) |
|
if err == nil { |
|
RunnersDir = candidate |
|
break |
|
} |
|
} |
|
if RunnersDir == "" { |
|
slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'") |
|
} |
|
} |
|
|
|
TmpDir = clean("OLLAMA_TMPDIR") |
|
|
|
userLimit := clean("OLLAMA_MAX_VRAM") |
|
if userLimit != "" { |
|
avail, err := strconv.ParseUint(userLimit, 10, 64) |
|
if err != nil { |
|
slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err) |
|
} else { |
|
MaxVRAM = avail |
|
} |
|
} |
|
|
|
LLMLibrary = clean("OLLAMA_LLM_LIBRARY") |
|
|
|
if onp := clean("OLLAMA_NUM_PARALLEL"); onp != "" { |
|
val, err := strconv.Atoi(onp) |
|
if err != nil || val <= 0 { |
|
slog.Error("invalid setting must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err) |
|
} else { |
|
NumParallel = val |
|
} |
|
} |
|
|
|
if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" { |
|
NoHistory = true |
|
} |
|
|
|
if spread := clean("OLLAMA_SCHED_SPREAD"); spread != "" { |
|
s, err := strconv.ParseBool(spread) |
|
if err == nil { |
|
SchedSpread = s |
|
} else { |
|
SchedSpread = true |
|
} |
|
} |
|
|
|
if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" { |
|
NoPrune = true |
|
} |
|
|
|
if origins := clean("OLLAMA_ORIGINS"); origins != "" { |
|
AllowOrigins = strings.Split(origins, ",") |
|
} |
|
for _, allowOrigin := range defaultAllowOrigins { |
|
AllowOrigins = append(AllowOrigins, |
|
fmt.Sprintf("http://%s", allowOrigin), |
|
fmt.Sprintf("https://%s", allowOrigin), |
|
fmt.Sprintf("http://%s", net.JoinHostPort(allowOrigin, "*")), |
|
fmt.Sprintf("https://%s", net.JoinHostPort(allowOrigin, "*")), |
|
) |
|
} |
|
|
|
AllowOrigins = append(AllowOrigins, |
|
"app://*", |
|
"file://*", |
|
"tauri://*", |
|
) |
|
|
|
maxRunners := clean("OLLAMA_MAX_LOADED_MODELS") |
|
if maxRunners != "" { |
|
m, err := strconv.Atoi(maxRunners) |
|
if err != nil { |
|
slog.Error("invalid setting", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err) |
|
} else { |
|
MaxRunners = m |
|
} |
|
} |
|
|
|
if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" { |
|
p, err := strconv.Atoi(onp) |
|
if err != nil || p <= 0 { |
|
slog.Error("invalid setting", "OLLAMA_MAX_QUEUE", onp, "error", err) |
|
} else { |
|
MaxQueuedRequests = p |
|
} |
|
} |
|
|
|
KeepAlive = clean("OLLAMA_KEEP_ALIVE") |
|
|
|
var err error |
|
ModelsDir, err = getModelsDir() |
|
if err != nil { |
|
slog.Error("invalid setting", "OLLAMA_MODELS", ModelsDir, "error", err) |
|
} |
|
|
|
Host, err = getOllamaHost() |
|
if err != nil { |
|
slog.Error("invalid setting", "OLLAMA_HOST", Host, "error", err, "using default port", Host.Port) |
|
} |
|
|
|
if set, err := strconv.ParseBool(clean("OLLAMA_INTEL_GPU")); err == nil { |
|
IntelGpu = set |
|
} |
|
|
|
CudaVisibleDevices = clean("CUDA_VISIBLE_DEVICES") |
|
HipVisibleDevices = clean("HIP_VISIBLE_DEVICES") |
|
RocrVisibleDevices = clean("ROCR_VISIBLE_DEVICES") |
|
GpuDeviceOrdinal = clean("GPU_DEVICE_ORDINAL") |
|
HsaOverrideGfxVersion = clean("HSA_OVERRIDE_GFX_VERSION") |
|
} |
|
|
|
func getModelsDir() (string, error) { |
|
if models, exists := os.LookupEnv("OLLAMA_MODELS"); exists { |
|
return models, nil |
|
} |
|
home, err := os.UserHomeDir() |
|
if err != nil { |
|
return "", err |
|
} |
|
return filepath.Join(home, ".ollama", "models"), nil |
|
} |
|
|
|
func getOllamaHost() (*OllamaHost, error) { |
|
defaultPort := "11434" |
|
|
|
hostVar := os.Getenv("OLLAMA_HOST") |
|
hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'")) |
|
|
|
scheme, hostport, ok := strings.Cut(hostVar, "://") |
|
switch { |
|
case !ok: |
|
scheme, hostport = "http", hostVar |
|
case scheme == "http": |
|
defaultPort = "80" |
|
case scheme == "https": |
|
defaultPort = "443" |
|
} |
|
|
|
|
|
hostport = strings.TrimRight(hostport, "/") |
|
|
|
host, port, err := net.SplitHostPort(hostport) |
|
if err != nil { |
|
host, port = "127.0.0.1", defaultPort |
|
if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil { |
|
host = ip.String() |
|
} else if hostport != "" { |
|
host = hostport |
|
} |
|
} |
|
|
|
if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 { |
|
return &OllamaHost{ |
|
Scheme: scheme, |
|
Host: host, |
|
Port: defaultPort, |
|
}, ErrInvalidHostPort |
|
} |
|
|
|
return &OllamaHost{ |
|
Scheme: scheme, |
|
Host: host, |
|
Port: port, |
|
}, nil |
|
} |
|
|