Spaces:

ZackBradshaw
/

vllm-gradio

Running

App Files Files Community

ZackBradshaw commited on Mar 2

Commit

fd34675

•

1 Parent(s): 72ee349

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README.md +2 -8
app.py +73 -0
requirements.txt +9 -0

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: Vllm Gradio
-emoji: 🐨
-colorFrom: indigo
-colorTo: yellow
 sdk: gradio
 sdk_version: 4.19.2
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: vllm-gradio
+app_file: app.py
 sdk: gradio
 sdk_version: 4.19.2
 ---

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import gradio as gr
+import requests
+import sky
+def deploy_vllm_on_sky(model_path, gpu_type, cpus, memory, cloud_provider, region, disk_size, disk_type):
+    task = sky.Task(
+        name="vllm_serving",
+        setup="pip install vllm",
+        run=f"vllm serve --model_name_or_path {model_path} --port 8080",
+        envs={"MODEL_PATH": model_path},
+        workdir=".",
+        ports=8080
+    )
+    task.set_resources(
+        sky.Resources(
+            cloud=sky.Cloud(provider=cloud_provider, region=region),
+            accelerators=f"{gpu_type}:1",
+            cpus=cpus,
+            memory=memory,
+            disk=sky.Disk(size=disk_size, type=disk_type)
+        )
+    )
+    cluster = sky.Cluster(
+        name="vllm-cluster",
+        cloud=sky.Cloud(provider=cloud_provider, region=region)
+    )
+    sky.launch(task, cluster=cluster)
+    return f"VLLM model deployed on SkyPilot with cluster name: {cluster.name}"
+def vllm_inference(prompt, cluster_name):
+    # Implementing cluster IP retrieval logic based on cluster name
+    cluster_ip = sky.get_cluster_ip(cluster_name)
+    response = requests.post(f"http://{cluster_ip}:8080", json={"inputs": prompt})
+    return response.json()["outputs"]
+vllm_inference_interface = gr.Interface(
+    fn=vllm_inference,
+    inputs=[
+        gr.Textbox(lines=5, label="Input Prompt"),
+        gr.Textbox(label="Cluster Name", placeholder="Enter the cluster name where VLLM is deployed")
+    ],
+    outputs="text",
+    title="VLLM Inference",
+    description="Enter a prompt to generate text using VLLM served on a SkyPilot-managed cloud instance."
+)
+sky_pilot_interface = gr.Interface(
+    fn=deploy_vllm_on_sky,
+    inputs=[
+        gr.Textbox(label="Model Path", placeholder="EleutherAI/gpt-neo-2.7B"),
+        gr.Dropdown(label="GPU Type", choices=["V100", "P100", "T4"], value="V100"),
+        gr.Slider(label="CPUs", minimum=1, maximum=16, value=4),
+        gr.Slider(label="Memory (GB)", minimum=4, maximum=64, value=16),
+        gr.Dropdown(label="Cloud Provider", choices=["AWS", "GCP", "Azure"], value="AWS"),
+        gr.Textbox(label="Region", placeholder="us-west-2"),
+        gr.Slider(label="Disk Size (GB)", minimum=20, maximum=1000, value=100),
+        gr.Dropdown(label="Disk Type", choices=["standard", "ssd"], value="ssd")
+    ],
+    outputs="text",
+    title="Deploy VLLM on SkyPilot",
+    description="Configure and deploy a VLLM model on a SkyPilot-managed cloud instance with full parameter customization."
+)
+if __name__ == "__main__":
+    with gr.Blocks() as app:
+        with gr.Row():
+            with gr.Column():
+                vllm_inference_interface.render()
+            with gr.Column():
+                sky_pilot_interface.render()
+    app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+gradio==2.3.7
+gradio_client==0.10.1
+python-dateutil==2.9.0.post0
+requests==2.25.1
+requests-file==2.0.0
+requests-oauthlib==1.3.1
+six==1.16.0
+sky==0.0.201
+types-python-dateutil==2.8.19.20240106