ZackBradshaw commited on
Commit
fd34675
1 Parent(s): 72ee349

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +2 -8
  2. app.py +73 -0
  3. requirements.txt +9 -0
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Vllm Gradio
3
- emoji: 🐨
4
- colorFrom: indigo
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 4.19.2
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: vllm-gradio
3
+ app_file: app.py
 
 
4
  sdk: gradio
5
  sdk_version: 4.19.2
 
 
6
  ---
 
 
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import sky
4
+
5
+ def deploy_vllm_on_sky(model_path, gpu_type, cpus, memory, cloud_provider, region, disk_size, disk_type):
6
+ task = sky.Task(
7
+ name="vllm_serving",
8
+ setup="pip install vllm",
9
+ run=f"vllm serve --model_name_or_path {model_path} --port 8080",
10
+ envs={"MODEL_PATH": model_path},
11
+ workdir=".",
12
+ ports=8080
13
+ )
14
+
15
+ task.set_resources(
16
+ sky.Resources(
17
+ cloud=sky.Cloud(provider=cloud_provider, region=region),
18
+ accelerators=f"{gpu_type}:1",
19
+ cpus=cpus,
20
+ memory=memory,
21
+ disk=sky.Disk(size=disk_size, type=disk_type)
22
+ )
23
+ )
24
+
25
+ cluster = sky.Cluster(
26
+ name="vllm-cluster",
27
+ cloud=sky.Cloud(provider=cloud_provider, region=region)
28
+ )
29
+
30
+ sky.launch(task, cluster=cluster)
31
+ return f"VLLM model deployed on SkyPilot with cluster name: {cluster.name}"
32
+
33
+ def vllm_inference(prompt, cluster_name):
34
+ # Implementing cluster IP retrieval logic based on cluster name
35
+ cluster_ip = sky.get_cluster_ip(cluster_name)
36
+ response = requests.post(f"http://{cluster_ip}:8080", json={"inputs": prompt})
37
+ return response.json()["outputs"]
38
+
39
+ vllm_inference_interface = gr.Interface(
40
+ fn=vllm_inference,
41
+ inputs=[
42
+ gr.Textbox(lines=5, label="Input Prompt"),
43
+ gr.Textbox(label="Cluster Name", placeholder="Enter the cluster name where VLLM is deployed")
44
+ ],
45
+ outputs="text",
46
+ title="VLLM Inference",
47
+ description="Enter a prompt to generate text using VLLM served on a SkyPilot-managed cloud instance."
48
+ )
49
+
50
+ sky_pilot_interface = gr.Interface(
51
+ fn=deploy_vllm_on_sky,
52
+ inputs=[
53
+ gr.Textbox(label="Model Path", placeholder="EleutherAI/gpt-neo-2.7B"),
54
+ gr.Dropdown(label="GPU Type", choices=["V100", "P100", "T4"], value="V100"),
55
+ gr.Slider(label="CPUs", minimum=1, maximum=16, value=4),
56
+ gr.Slider(label="Memory (GB)", minimum=4, maximum=64, value=16),
57
+ gr.Dropdown(label="Cloud Provider", choices=["AWS", "GCP", "Azure"], value="AWS"),
58
+ gr.Textbox(label="Region", placeholder="us-west-2"),
59
+ gr.Slider(label="Disk Size (GB)", minimum=20, maximum=1000, value=100),
60
+ gr.Dropdown(label="Disk Type", choices=["standard", "ssd"], value="ssd")
61
+ ],
62
+ outputs="text",
63
+ title="Deploy VLLM on SkyPilot",
64
+ description="Configure and deploy a VLLM model on a SkyPilot-managed cloud instance with full parameter customization."
65
+ )
66
+ if __name__ == "__main__":
67
+ with gr.Blocks() as app:
68
+ with gr.Row():
69
+ with gr.Column():
70
+ vllm_inference_interface.render()
71
+ with gr.Column():
72
+ sky_pilot_interface.render()
73
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio==2.3.7
2
+ gradio_client==0.10.1
3
+ python-dateutil==2.9.0.post0
4
+ requests==2.25.1
5
+ requests-file==2.0.0
6
+ requests-oauthlib==1.3.1
7
+ six==1.16.0
8
+ sky==0.0.201
9
+ types-python-dateutil==2.8.19.20240106