Yong Liu commited on
Commit
093ad9c
·
1 Parent(s): dc63702

update handler

Browse files
Files changed (3) hide show
  1. example_client.py +116 -0
  2. handler.py +40 -2
  3. test_hf_endpoint.py +63 -0
example_client.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import os
4
+ import argparse
5
+
6
+ def query_endpoint(endpoint_url, api_token=None, prompt="Hello, how are you?",
7
+ system_message="You are a helpful assistant.",
8
+ max_tokens=256, temperature=0.7,
9
+ format_type="openai"):
10
+ """
11
+ Query the Phi-4 Mini model at the specified HuggingFace Inference Endpoint.
12
+
13
+ Args:
14
+ endpoint_url: The URL of your HuggingFace Inference Endpoint
15
+ api_token: Your HuggingFace API token (if needed)
16
+ prompt: The user message to send to the model
17
+ system_message: The system message to include
18
+ max_tokens: Maximum number of tokens to generate
19
+ temperature: Temperature for generation (0.0 to 1.0)
20
+ format_type: Type of request format to use:
21
+ "openai" - Standard OpenAI format
22
+ "hf_wrapped" - HuggingFace format with OpenAI format wrapped in "inputs"
23
+ "simple" - Simple text input in "inputs" field
24
+
25
+ Returns:
26
+ The response from the model
27
+ """
28
+ # Prepare headers
29
+ headers = {
30
+ "Content-Type": "application/json"
31
+ }
32
+
33
+ if api_token:
34
+ headers["Authorization"] = f"Bearer {api_token}"
35
+
36
+ # Prepare the request payload based on format_type
37
+ if format_type == "openai":
38
+ # Standard OpenAI format
39
+ payload = {
40
+ "messages": [
41
+ {"role": "system", "content": system_message},
42
+ {"role": "user", "content": prompt}
43
+ ],
44
+ "max_tokens": max_tokens,
45
+ "temperature": temperature
46
+ }
47
+ elif format_type == "hf_wrapped":
48
+ # HuggingFace wrapped format
49
+ payload = {
50
+ "inputs": {
51
+ "messages": [
52
+ {"role": "system", "content": system_message},
53
+ {"role": "user", "content": prompt}
54
+ ],
55
+ "max_tokens": max_tokens,
56
+ "temperature": temperature
57
+ }
58
+ }
59
+ elif format_type == "simple":
60
+ # Simple text input
61
+ payload = {
62
+ "inputs": prompt
63
+ }
64
+ else:
65
+ raise ValueError(f"Invalid format type: {format_type}")
66
+
67
+ # Make the request
68
+ try:
69
+ print(f"Request payload: {json.dumps(payload, indent=2)}")
70
+ response = requests.post(endpoint_url, headers=headers, data=json.dumps(payload))
71
+ response.raise_for_status() # Raise an exception for HTTP errors
72
+
73
+ # Parse and return the response
74
+ return response.json()
75
+ except requests.exceptions.RequestException as e:
76
+ print(f"Error making request: {e}")
77
+ if hasattr(e, 'response') and e.response:
78
+ print(f"Response content: {e.response.text}")
79
+ return None
80
+
81
+ if __name__ == "__main__":
82
+ parser = argparse.ArgumentParser(description="Query a Phi-4 Mini HuggingFace Inference Endpoint")
83
+ parser.add_argument("--url", type=str, required=True, help="The endpoint URL")
84
+ parser.add_argument("--token", type=str, default=os.environ.get("HF_API_TOKEN"), help="HuggingFace API token")
85
+ parser.add_argument("--prompt", type=str, default="Explain quantum computing in simple terms.", help="User prompt")
86
+ parser.add_argument("--system", type=str, default="You are a helpful assistant.", help="System message")
87
+ parser.add_argument("--max_tokens", type=int, default=256, help="Maximum tokens to generate")
88
+ parser.add_argument("--temperature", type=float, default=0.7, help="Temperature (0.0 to 1.0)")
89
+ parser.add_argument("--format", type=str, default="openai",
90
+ choices=["openai", "hf_wrapped", "simple"],
91
+ help="Format to use for the request")
92
+
93
+ args = parser.parse_args()
94
+
95
+ print(f"Querying endpoint: {args.url}")
96
+ print(f"Prompt: {args.prompt}")
97
+ print(f"Format: {args.format}")
98
+
99
+ response = query_endpoint(
100
+ args.url,
101
+ args.token,
102
+ args.prompt,
103
+ args.system,
104
+ args.max_tokens,
105
+ args.temperature,
106
+ args.format
107
+ )
108
+
109
+ if response:
110
+ print("\nResponse:")
111
+ if "choices" in response and len(response["choices"]) > 0:
112
+ print(response["choices"][0]["message"]["content"])
113
+ else:
114
+ print(json.dumps(response, indent=2))
115
+ else:
116
+ print("Failed to get a valid response")
handler.py CHANGED
@@ -34,10 +34,44 @@ class EndpointHandler:
34
  print("Model loaded successfully")
35
 
36
  def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
37
- """Handle inference request in OpenAI-like format"""
38
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # Parse input data
40
- inputs = self._parse_input(data)
41
 
42
  # Generate response
43
  outputs = self._generate(inputs)
@@ -45,6 +79,9 @@ class EndpointHandler:
45
  # Format response in OpenAI-like format
46
  return self._format_response(outputs, inputs)
47
  except Exception as e:
 
 
 
48
  return {
49
  "error": {
50
  "message": str(e),
@@ -58,6 +95,7 @@ class EndpointHandler:
58
  # Extract messages
59
  messages = data.get("messages", [])
60
  if not messages:
 
61
  raise ValueError("No messages provided")
62
 
63
  # Convert messages to prompt
 
34
  print("Model loaded successfully")
35
 
36
  def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
37
+ """Handle inference request in OpenAI-like format or HuggingFace Inference API format"""
38
  try:
39
+ # Debugging: Print the received data
40
+ print(f"Received data: {json.dumps(data, indent=2)}")
41
+
42
+ # Handle HuggingFace Inference API format
43
+ if "inputs" in data:
44
+ # Extract data from inputs key
45
+ if isinstance(data["inputs"], dict):
46
+ # If inputs contains a dictionary, extract it
47
+ input_data = data["inputs"]
48
+ elif isinstance(data["inputs"], str):
49
+ # If inputs is a string, create a simple message
50
+ input_data = {
51
+ "messages": [
52
+ {"role": "user", "content": data["inputs"]}
53
+ ]
54
+ }
55
+ else:
56
+ print(f"Unexpected inputs format: {type(data['inputs'])}")
57
+ # Try to convert to string if possible
58
+ try:
59
+ input_data = {
60
+ "messages": [
61
+ {"role": "user", "content": str(data["inputs"])}
62
+ ]
63
+ }
64
+ except:
65
+ raise ValueError(f"Unsupported inputs format: {type(data['inputs'])}")
66
+ else:
67
+ # Assume direct OpenAI format
68
+ input_data = data
69
+
70
+ # Debugging: Print the parsed input data
71
+ print(f"Parsed input data: {json.dumps(input_data, indent=2)}")
72
+
73
  # Parse input data
74
+ inputs = self._parse_input(input_data)
75
 
76
  # Generate response
77
  outputs = self._generate(inputs)
 
79
  # Format response in OpenAI-like format
80
  return self._format_response(outputs, inputs)
81
  except Exception as e:
82
+ print(f"Error during processing: {str(e)}")
83
+ import traceback
84
+ traceback.print_exc()
85
  return {
86
  "error": {
87
  "message": str(e),
 
95
  # Extract messages
96
  messages = data.get("messages", [])
97
  if not messages:
98
+ print(f"No messages found in data: {json.dumps(data, indent=2)}")
99
  raise ValueError("No messages provided")
100
 
101
  # Convert messages to prompt
test_hf_endpoint.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import argparse
4
+
5
+ def test_hf_endpoint(endpoint_url, api_token=None):
6
+ """Test the HuggingFace Inference Endpoint with different formats"""
7
+
8
+ # Format 1: Wrapped OpenAI format
9
+ payload1 = {
10
+ "inputs": {
11
+ "messages": [
12
+ {"role": "system", "content": "You are a helpful assistant."},
13
+ {"role": "user", "content": "Explain quantum computing in simple terms."}
14
+ ],
15
+ "max_tokens": 256,
16
+ "temperature": 0.7
17
+ }
18
+ }
19
+
20
+ # Format 2: Simple string
21
+ payload2 = {
22
+ "inputs": "Tell me about AI."
23
+ }
24
+
25
+ # Prepare headers
26
+ headers = {
27
+ "Content-Type": "application/json",
28
+ }
29
+
30
+ if api_token:
31
+ headers["Authorization"] = f"Bearer {api_token}"
32
+
33
+ # Test Format 1
34
+ print("Testing Format 1: Wrapped OpenAI format...")
35
+ print(f"Payload: {json.dumps(payload1, indent=2)}")
36
+
37
+ try:
38
+ response = requests.post(endpoint_url, headers=headers, json=payload1)
39
+ print(f"Status Code: {response.status_code}")
40
+ print(f"Response: {json.dumps(response.json(), indent=2)}")
41
+ except Exception as e:
42
+ print(f"Error: {str(e)}")
43
+
44
+ print("\n" + "-"*50 + "\n")
45
+
46
+ # Test Format 2
47
+ print("Testing Format 2: Simple string...")
48
+ print(f"Payload: {json.dumps(payload2, indent=2)}")
49
+
50
+ try:
51
+ response = requests.post(endpoint_url, headers=headers, json=payload2)
52
+ print(f"Status Code: {response.status_code}")
53
+ print(f"Response: {json.dumps(response.json(), indent=2)}")
54
+ except Exception as e:
55
+ print(f"Error: {str(e)}")
56
+
57
+ if __name__ == "__main__":
58
+ parser = argparse.ArgumentParser(description="Test HuggingFace Inference Endpoint")
59
+ parser.add_argument("--url", type=str, required=True, help="Endpoint URL")
60
+ parser.add_argument("--token", type=str, help="API token (if needed)")
61
+
62
+ args = parser.parse_args()
63
+ test_hf_endpoint(args.url, args.token)