File size: 1,587 Bytes
f00f379 17fbf3d f00f379 17fbf3d f00f379 17fbf3d f00f379 17fbf3d f00f379 17fbf3d f00f379 17fbf3d f00f379 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import os
import requests
import json
from dotenv import load_dotenv
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
HEADERS = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
def mistral_generate(prompt: str,
max_new_tokens: int = 128,
temperature: float = 0.7) -> str:
"""
Call the HF Inference-API for Mistral-7B-Instruct-v0.3.
- Automatically waits while the model spins up (`wait_for_model=true`).
- Returns the generated text or an empty string on failure.
"""
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": max_new_tokens,
"temperature": temperature
}
}
try:
r = requests.post(
HF_API_URL,
headers=HEADERS,
params={"wait_for_model": "true"}, # ⭐ key change
json=payload, # use `json=` not `data=`
timeout=90 # give the model time to load
)
r.raise_for_status()
data = r.json()
# HF returns a list of generated texts for standard text-generation models
if isinstance(data, list) and data:
return data[0].get("generated_text", "").strip()
except requests.exceptions.RequestException as e:
# You might want to log `r.text` as well for quota or auth errors
print("❌ Mistral API error:", str(e))
return ""
|