File size: 2,014 Bytes
cf0c6cb
6cb06ab
a67c3b3
6cb06ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a67c3b3
 
 
 
 
 
 
 
 
 
 
 
6cb06ab
a67c3b3
 
 
 
 
 
6cb06ab
 
a67c3b3
6cb06ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a67c3b3
 
 
6cb06ab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from proxy_server import app, save_worker_config
import uvicorn
import random
import subprocess, json
import os

host = "0.0.0.0"
port = 8000
api_base = None
api_version = "2023-07-01-preview"
model = None
alias = None
add_key = None
headers = None
save = False
debug = False
detailed_debug = False
temperature = 0.0
max_tokens = 1000
request_timeout = 10
drop_params = True
add_function_to_prompt = True
config = None
max_budget = 100
telemetry = False
test = False
local = False
num_workers = 1
test_async = False
num_requests = 1
use_queue = False
health = False
version = False


def run_ollama_serve():
    try:
        command = ["ollama", "serve"]

        with open(os.devnull, "w") as devnull:
            process = subprocess.Popen(command, stdout=devnull, stderr=devnull)
    except Exception as e:
        print(
            f"""
            LiteLLM Warning: proxy started with `ollama` model\n`ollama serve` failed with Exception{e}. \nEnsure you run `ollama serve`
        """
        )

def is_port_in_use(port):
    import socket

    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        return s.connect_ex(("localhost", port)) == 0
    
if model and "ollama" in model and api_base is None:
        run_ollama_serve()
    
else:
    if headers:
        headers = json.loads(headers)
    save_worker_config(
        model=model,
        alias=alias,
        api_base=api_base,
        api_version=api_version,
        debug=debug,
        detailed_debug=detailed_debug,
        temperature=temperature,
        max_tokens=max_tokens,
        request_timeout=request_timeout,
        max_budget=max_budget,
        telemetry=telemetry,
        drop_params=drop_params,
        add_function_to_prompt=add_function_to_prompt,
        headers=headers,
        save=save,
        config=config,
        use_queue=use_queue,
    )
    
if port == 8000 and is_port_in_use(port):
    port = random.randint(1024, 49152)


if __name__ == "__main__":
    uvicorn.run(app, host=host, port=port)