rasAli02 commited on
Commit
9505f49
Β·
1 Parent(s): f89b145

πŸš€ ForgeSight: Live MI300X inference connection update

Browse files
Files changed (3) hide show
  1. agents.py +4 -2
  2. app.py +6 -2
  3. deploy.ps1 +7 -5
agents.py CHANGED
@@ -25,7 +25,7 @@ AMD_INFERENCE_URL = os.environ.get(
25
  # Token for the AMD inference server (if required)
26
  AMD_INFERENCE_TOKEN = os.environ.get(
27
  "AMD_INFERENCE_TOKEN",
28
- "DiPipPSZoxb96rcrP7X+B0N5mTTEzxU/ziesgI/Z2NPo9xPKM"
29
  )
30
 
31
  # The model name vLLM is serving (used in the chat/completions request).
@@ -188,9 +188,11 @@ async def _call_amd_vllm(
188
  # Candidate endpoints
189
  base_url = AMD_INFERENCE_URL.rstrip("/")
190
  candidates = [
191
- f"{base_url}/v1/chat/completions",
192
  f"{base_url}/proxy/8000/v1/chat/completions",
 
193
  f"{base_url}:8000/v1/chat/completions",
 
 
194
  ]
195
 
196
  headers = {}
 
25
  # Token for the AMD inference server (if required)
26
  AMD_INFERENCE_TOKEN = os.environ.get(
27
  "AMD_INFERENCE_TOKEN",
28
+ "5peRa6unb0DdXvzB3Pbck48IgNTDmxeJSUvE4NdnhvW70FcaX"
29
  )
30
 
31
  # The model name vLLM is serving (used in the chat/completions request).
 
188
  # Candidate endpoints
189
  base_url = AMD_INFERENCE_URL.rstrip("/")
190
  candidates = [
 
191
  f"{base_url}/proxy/8000/v1/chat/completions",
192
+ f"{base_url}/proxy/8001/v1/chat/completions",
193
  f"{base_url}:8000/v1/chat/completions",
194
+ f"{base_url}:8001/v1/chat/completions",
195
+ f"{base_url}/v1/chat/completions",
196
  ]
197
 
198
  headers = {}
app.py CHANGED
@@ -201,20 +201,24 @@ async def api_get_telemetry():
201
  # Candidate endpoints
202
  base_url = AMD_INFERENCE_URL.rstrip("/")
203
  candidates = [
204
- f"{base_url}/v1/models",
205
  f"{base_url}/proxy/8000/v1/models",
 
206
  f"{base_url}:8000/v1/models",
 
 
207
  ]
208
 
209
  headers = {}
210
  if AMD_INFERENCE_TOKEN:
 
211
  headers["Authorization"] = f"token {AMD_INFERENCE_TOKEN}"
212
 
213
  last_err = None
214
  success_url = None
215
  for url in candidates:
216
  try:
217
- async with httpx.AsyncClient(timeout=2.0) as client:
 
218
  test_url = f"{url}?token={AMD_INFERENCE_TOKEN}" if AMD_INFERENCE_TOKEN else url
219
  resp = await client.get(test_url, headers=headers)
220
  if resp.status_code == 200:
 
201
  # Candidate endpoints
202
  base_url = AMD_INFERENCE_URL.rstrip("/")
203
  candidates = [
 
204
  f"{base_url}/proxy/8000/v1/models",
205
+ f"{base_url}/proxy/8001/v1/models",
206
  f"{base_url}:8000/v1/models",
207
+ f"{base_url}:8001/v1/models",
208
+ f"{base_url}/v1/models",
209
  ]
210
 
211
  headers = {}
212
  if AMD_INFERENCE_TOKEN:
213
+ # Use BOTH header formats for compatibility
214
  headers["Authorization"] = f"token {AMD_INFERENCE_TOKEN}"
215
 
216
  last_err = None
217
  success_url = None
218
  for url in candidates:
219
  try:
220
+ # Increase timeout to 5s for remote server wake-up
221
+ async with httpx.AsyncClient(timeout=5.0) as client:
222
  test_url = f"{url}?token={AMD_INFERENCE_TOKEN}" if AMD_INFERENCE_TOKEN else url
223
  resp = await client.get(test_url, headers=headers)
224
  if resp.status_code == 200:
deploy.ps1 CHANGED
@@ -1,16 +1,18 @@
1
  # Deploy ForgeSight to Hugging Face Spaces
2
  # Run this from the project root: c:\Users\user\OneDrive\Desktop\hans\hans
3
 
4
- # 1. Clone the HF Space repo (if not already done)
5
- git clone https://huggingface.co/spaces/rasAli02/ForgeSight hf_space_repo
 
 
6
 
7
- # 2. Copy all deployment files into the cloned repo
8
- Copy-Item hf_space\* hf_space_repo\ -Force
9
 
10
  # 3. Push to HF Spaces
11
  Set-Location hf_space_repo
12
  git add -A
13
- git commit -m "Deploy ForgeSight Gradio backend with AMD MI300X agent pipeline"
14
  git push
15
 
16
  # After push, the space will build and start at:
 
1
  # Deploy ForgeSight to Hugging Face Spaces
2
  # Run this from the project root: c:\Users\user\OneDrive\Desktop\hans\hans
3
 
4
+ # 1. Clone/Update the HF Space repo
5
+ if (!(Test-Path hf_space_repo)) {
6
+ git clone https://huggingface.co/spaces/lablab-ai-amd-developer-hackathon/ForgeSight hf_space_repo
7
+ }
8
 
9
+ # 2. Copy all deployment files recursively into the cloned repo
10
+ Copy-Item -Path "hf_space\*" -Destination "hf_space_repo\" -Recurse -Force
11
 
12
  # 3. Push to HF Spaces
13
  Set-Location hf_space_repo
14
  git add -A
15
+ git commit -m "πŸš€ ForgeSight: Enhanced AMD MI300X connectivity with Smart Discovery"
16
  git push
17
 
18
  # After push, the space will build and start at: