Spaces:
Running
on
Zero
Running
on
Zero
import sys | |
import os | |
import time | |
import argparse | |
import subprocess | |
import bunny.serve.gradio_web_server as gws | |
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-e', '.']) | |
def start_controller(): | |
controller_command = [ | |
sys.executable, '-m', 'bunny.serve.controller', | |
'--host', '0.0.0.0', | |
'--port', '10000' | |
] | |
return subprocess.Popen(controller_command) | |
def start_worker(port: int, model_path: str, model_type: str): | |
worker_command = [ | |
sys.executable, '-m', 'bunny.serve.model_worker', | |
'--host', '0.0.0.0', | |
'--controller', 'http://localhost:10000', | |
'--port', f'{port}', | |
'--worker', f'http://localhost:{port}', | |
'--model-path', model_path, | |
'--model-type', model_type | |
] | |
return subprocess.Popen(worker_command) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--host", type=str, default="0.0.0.0") | |
parser.add_argument("--port", type=int) | |
parser.add_argument("--controller-url", type=str, default="http://localhost:10000") | |
parser.add_argument("--concurrency-count", type=int, default=5) | |
parser.add_argument("--model-list-mode", type=str, default="reload", choices=["once", "reload"]) | |
parser.add_argument("--share", action="store_true") | |
parser.add_argument("--moderate", action="store_true") | |
parser.add_argument("--embed", action="store_true") | |
gws.args = parser.parse_args() | |
gws.models = [] | |
controller_proc = start_controller() | |
worker_procs = [] | |
worker_procs.append(start_worker(port=40000, model_path='BAAI/Bunny-v1_1-Llama-3-8B-V', model_type='llama3-8b')) | |
worker_procs.append(start_worker(port=40001, model_path='BAAI/Bunny-v1_1-4B', model_type='phi-3')) | |
worker_procs.append(start_worker(port=40002, model_path='BAAI/Bunny-v1_0-3B', model_type='phi-2')) | |
time.sleep(60) | |
exit_status = 0 | |
try: | |
demo = gws.build_demo(embed_mode=gws.args.embed) | |
demo.launch( | |
server_name=gws.args.host, | |
server_port=gws.args.port, | |
share=gws.args.share, | |
debug=True, | |
max_threads=10 | |
) | |
except Exception as e: | |
print(e) | |
exit_status = 1 | |
finally: | |
for worker_proc in worker_procs: | |
worker_proc.kill() | |
controller_proc.kill() | |
sys.exit(exit_status) | |