derek-thomas HF staff commited on
Commit
7dfa79f
1 Parent(s): 37df3b9

Using gr.Warning, and adding jais generate time.

Browse files
Files changed (1) hide show
  1. backend/query_llm.py +19 -26
backend/query_llm.py CHANGED
@@ -1,10 +1,16 @@
1
  import datetime
 
2
  import os
3
  from os import getenv
 
4
 
5
  import gradio as gr
6
  import requests
7
 
 
 
 
 
8
  API_URL = getenv('API_URL')
9
  BEARER = getenv('BEARER')
10
 
@@ -22,56 +28,43 @@ def call_jais(payload):
22
  except requests.exceptions.HTTPError as http_err:
23
  # Check if the error is a 5XX server error
24
  if 500 <= http_err.response.status_code < 600:
25
- raise gr.Error("The endpoint is loading, it takes about 4 min from the first call.")
26
  else:
27
- raise gr.Error(f"An error occurred while processing the request. {http_err}")
28
  except Exception as err:
29
- raise gr.Error(f"Check Inference Endpoint Status. An error occurred while processing the request. {err}")
30
 
31
 
32
  def generate(prompt: str):
 
 
33
  payload = {'inputs': '', 'prompt': prompt}
34
  response = call_jais(payload)
35
- return response
36
 
 
 
 
37
 
38
- # Global variable to store the previous status and the time when it changed
39
- previous_status = None
40
- status_change_time = None
41
 
42
 
43
  def check_endpoint_status():
44
- global previous_status, status_change_time
45
-
46
  api_url = os.getenv("ENDPOINT_URL")
47
  headers = {
48
  'accept': 'application/json',
49
  'Authorization': f'Bearer {os.getenv("BEARER")}'
50
- }
51
 
52
  try:
53
  response = requests.get(api_url, headers=headers)
54
- response.raise_for_status() # will throw an exception for non-200 status
55
  data = response.json()
56
 
57
  # Extracting the status information
58
  status = data.get('status', {}).get('state', 'No status found')
59
  message = data.get('status', {}).get('message', 'No message found')
60
 
61
- # Check if the status has changed
62
- if status != previous_status:
63
- previous_status = status
64
- status_change_time = datetime.datetime.now()
65
-
66
- # If the previous status was 'scaled to zero' and the current one isn't,
67
- # start the countdown
68
- countdown_message = ""
69
- if status_change_time and previous_status == "scaled to zero" and status != "scaled to zero":
70
- elapsed_time = datetime.datetime.now() - status_change_time
71
- if elapsed_time < datetime.timedelta(minutes=4):
72
- remaining_time = datetime.timedelta(minutes=4) - elapsed_time
73
- countdown_message = f"Countdown: {remaining_time} remaining until fully operational."
74
-
75
- return f"Status: {status}\nMessage: {message}\n{countdown_message}"
76
  except requests.exceptions.RequestException as e:
77
  return f"Failed to get status: {str(e)}"
 
1
  import datetime
2
+ import logging
3
  import os
4
  from os import getenv
5
+ import time
6
 
7
  import gradio as gr
8
  import requests
9
 
10
+ # Setting up the logging
11
+ logging.basicConfig(level=logging.INFO)
12
+ logger = logging.getLogger(__name__)
13
+
14
  API_URL = getenv('API_URL')
15
  BEARER = getenv('BEARER')
16
 
 
28
  except requests.exceptions.HTTPError as http_err:
29
  # Check if the error is a 5XX server error
30
  if 500 <= http_err.response.status_code < 600:
31
+ raise gr.Warning("The endpoint is loading, it takes about 4 min from the first call.")
32
  else:
33
+ raise gr.Warning(f"An error occurred while processing the request. {http_err}")
34
  except Exception as err:
35
+ raise gr.Warning(f"Check Inference Endpoint Status. An error occurred while processing the request. {err}")
36
 
37
 
38
  def generate(prompt: str):
39
+ start_time = time.perf_counter()
40
+
41
  payload = {'inputs': '', 'prompt': prompt}
42
  response = call_jais(payload)
 
43
 
44
+ end_time = time.perf_counter()
45
+ elapsed_time = end_time - start_time
46
+ logger.warning(f"Function took {elapsed_time:.1f} seconds to execute")
47
 
48
+ return response
 
 
49
 
50
 
51
  def check_endpoint_status():
52
+ # Replace with the actual API URL and headers
 
53
  api_url = os.getenv("ENDPOINT_URL")
54
  headers = {
55
  'accept': 'application/json',
56
  'Authorization': f'Bearer {os.getenv("BEARER")}'
57
+ }
58
 
59
  try:
60
  response = requests.get(api_url, headers=headers)
61
+ response.raise_for_status()
62
  data = response.json()
63
 
64
  # Extracting the status information
65
  status = data.get('status', {}).get('state', 'No status found')
66
  message = data.get('status', {}).get('message', 'No message found')
67
 
68
+ return f"Status: {status}\nMessage: {message}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  except requests.exceptions.RequestException as e:
70
  return f"Failed to get status: {str(e)}"