Spaces:
Running
Running
from helpers import get_credentials | |
import requests | |
def hf_inference(prompt, model_id, temperature, max_new_tokens): | |
hf_token, _ = get_credentials.get_hf_credentials() | |
API_URL = "https://router.huggingface.co/together/v1/chat/completions" | |
headers = { | |
"Authorization": f"Bearer {hf_token}", | |
} | |
response = requests.post( | |
API_URL, | |
headers=headers, | |
json={ | |
"messages": [ | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "text", | |
"text": prompt | |
}, | |
] | |
} | |
], | |
"model": model_id, | |
'temperature': temperature, | |
'max_new_tokens': max_new_tokens, | |
} | |
) | |
return response.json()["choices"][0]["message"] | |
def replicate_inference(prompt, model_id, temperature, max_new_tokens): | |
repl_token = get_credentials.get_replicate_credentials() | |
API_URL = f"https://api.replicate.com/v1/models/{model_id}/predictions" | |
headers = { | |
"Authorization": f"Bearer {repl_token}", | |
"Content-Type": "application/json", | |
"Prefer": "wait" | |
} | |
response = requests.post( | |
API_URL, | |
headers=headers, | |
json={ | |
"input": { | |
"prompt": prompt, | |
"temperature": temperature, | |
"max_tokens": max_new_tokens, | |
} | |
} | |
) | |
return { | |
"content": "".join(response.json()['output']) | |
} | |
INFERENCE_HANDLER = { | |
'huggingface': hf_inference, | |
'replicate': replicate_inference | |
} |