π ForgeSight: Live MI300X inference connection update
Browse files- agents.py +4 -2
- app.py +6 -2
- deploy.ps1 +7 -5
agents.py
CHANGED
|
@@ -25,7 +25,7 @@ AMD_INFERENCE_URL = os.environ.get(
|
|
| 25 |
# Token for the AMD inference server (if required)
|
| 26 |
AMD_INFERENCE_TOKEN = os.environ.get(
|
| 27 |
"AMD_INFERENCE_TOKEN",
|
| 28 |
-
"
|
| 29 |
)
|
| 30 |
|
| 31 |
# The model name vLLM is serving (used in the chat/completions request).
|
|
@@ -188,9 +188,11 @@ async def _call_amd_vllm(
|
|
| 188 |
# Candidate endpoints
|
| 189 |
base_url = AMD_INFERENCE_URL.rstrip("/")
|
| 190 |
candidates = [
|
| 191 |
-
f"{base_url}/v1/chat/completions",
|
| 192 |
f"{base_url}/proxy/8000/v1/chat/completions",
|
|
|
|
| 193 |
f"{base_url}:8000/v1/chat/completions",
|
|
|
|
|
|
|
| 194 |
]
|
| 195 |
|
| 196 |
headers = {}
|
|
|
|
| 25 |
# Token for the AMD inference server (if required)
|
| 26 |
AMD_INFERENCE_TOKEN = os.environ.get(
|
| 27 |
"AMD_INFERENCE_TOKEN",
|
| 28 |
+
"5peRa6unb0DdXvzB3Pbck48IgNTDmxeJSUvE4NdnhvW70FcaX"
|
| 29 |
)
|
| 30 |
|
| 31 |
# The model name vLLM is serving (used in the chat/completions request).
|
|
|
|
| 188 |
# Candidate endpoints
|
| 189 |
base_url = AMD_INFERENCE_URL.rstrip("/")
|
| 190 |
candidates = [
|
|
|
|
| 191 |
f"{base_url}/proxy/8000/v1/chat/completions",
|
| 192 |
+
f"{base_url}/proxy/8001/v1/chat/completions",
|
| 193 |
f"{base_url}:8000/v1/chat/completions",
|
| 194 |
+
f"{base_url}:8001/v1/chat/completions",
|
| 195 |
+
f"{base_url}/v1/chat/completions",
|
| 196 |
]
|
| 197 |
|
| 198 |
headers = {}
|
app.py
CHANGED
|
@@ -201,20 +201,24 @@ async def api_get_telemetry():
|
|
| 201 |
# Candidate endpoints
|
| 202 |
base_url = AMD_INFERENCE_URL.rstrip("/")
|
| 203 |
candidates = [
|
| 204 |
-
f"{base_url}/v1/models",
|
| 205 |
f"{base_url}/proxy/8000/v1/models",
|
|
|
|
| 206 |
f"{base_url}:8000/v1/models",
|
|
|
|
|
|
|
| 207 |
]
|
| 208 |
|
| 209 |
headers = {}
|
| 210 |
if AMD_INFERENCE_TOKEN:
|
|
|
|
| 211 |
headers["Authorization"] = f"token {AMD_INFERENCE_TOKEN}"
|
| 212 |
|
| 213 |
last_err = None
|
| 214 |
success_url = None
|
| 215 |
for url in candidates:
|
| 216 |
try:
|
| 217 |
-
|
|
|
|
| 218 |
test_url = f"{url}?token={AMD_INFERENCE_TOKEN}" if AMD_INFERENCE_TOKEN else url
|
| 219 |
resp = await client.get(test_url, headers=headers)
|
| 220 |
if resp.status_code == 200:
|
|
|
|
| 201 |
# Candidate endpoints
|
| 202 |
base_url = AMD_INFERENCE_URL.rstrip("/")
|
| 203 |
candidates = [
|
|
|
|
| 204 |
f"{base_url}/proxy/8000/v1/models",
|
| 205 |
+
f"{base_url}/proxy/8001/v1/models",
|
| 206 |
f"{base_url}:8000/v1/models",
|
| 207 |
+
f"{base_url}:8001/v1/models",
|
| 208 |
+
f"{base_url}/v1/models",
|
| 209 |
]
|
| 210 |
|
| 211 |
headers = {}
|
| 212 |
if AMD_INFERENCE_TOKEN:
|
| 213 |
+
# Use BOTH header formats for compatibility
|
| 214 |
headers["Authorization"] = f"token {AMD_INFERENCE_TOKEN}"
|
| 215 |
|
| 216 |
last_err = None
|
| 217 |
success_url = None
|
| 218 |
for url in candidates:
|
| 219 |
try:
|
| 220 |
+
# Increase timeout to 5s for remote server wake-up
|
| 221 |
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
| 222 |
test_url = f"{url}?token={AMD_INFERENCE_TOKEN}" if AMD_INFERENCE_TOKEN else url
|
| 223 |
resp = await client.get(test_url, headers=headers)
|
| 224 |
if resp.status_code == 200:
|
deploy.ps1
CHANGED
|
@@ -1,16 +1,18 @@
|
|
| 1 |
# Deploy ForgeSight to Hugging Face Spaces
|
| 2 |
# Run this from the project root: c:\Users\user\OneDrive\Desktop\hans\hans
|
| 3 |
|
| 4 |
-
# 1. Clone the HF Space repo
|
| 5 |
-
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
# 2. Copy all deployment files into the cloned repo
|
| 8 |
-
Copy-Item hf_space\* hf_space_repo\ -Force
|
| 9 |
|
| 10 |
# 3. Push to HF Spaces
|
| 11 |
Set-Location hf_space_repo
|
| 12 |
git add -A
|
| 13 |
-
git commit -m "
|
| 14 |
git push
|
| 15 |
|
| 16 |
# After push, the space will build and start at:
|
|
|
|
| 1 |
# Deploy ForgeSight to Hugging Face Spaces
|
| 2 |
# Run this from the project root: c:\Users\user\OneDrive\Desktop\hans\hans
|
| 3 |
|
| 4 |
+
# 1. Clone/Update the HF Space repo
|
| 5 |
+
if (!(Test-Path hf_space_repo)) {
|
| 6 |
+
git clone https://huggingface.co/spaces/lablab-ai-amd-developer-hackathon/ForgeSight hf_space_repo
|
| 7 |
+
}
|
| 8 |
|
| 9 |
+
# 2. Copy all deployment files recursively into the cloned repo
|
| 10 |
+
Copy-Item -Path "hf_space\*" -Destination "hf_space_repo\" -Recurse -Force
|
| 11 |
|
| 12 |
# 3. Push to HF Spaces
|
| 13 |
Set-Location hf_space_repo
|
| 14 |
git add -A
|
| 15 |
+
git commit -m "π ForgeSight: Enhanced AMD MI300X connectivity with Smart Discovery"
|
| 16 |
git push
|
| 17 |
|
| 18 |
# After push, the space will build and start at:
|