FastAPI / models_initialization /mistral_registry.py
raghavNCI
one more try mistral
17fbf3d
raw
history blame
1.59 kB
import os
import requests
import json
from dotenv import load_dotenv
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
HEADERS = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
def mistral_generate(prompt: str,
max_new_tokens: int = 128,
temperature: float = 0.7) -> str:
"""
Call the HF Inference-API for Mistral-7B-Instruct-v0.3.
- Automatically waits while the model spins up (`wait_for_model=true`).
- Returns the generated text or an empty string on failure.
"""
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": max_new_tokens,
"temperature": temperature
}
}
try:
r = requests.post(
HF_API_URL,
headers=HEADERS,
params={"wait_for_model": "true"}, # ⭐ key change
json=payload, # use `json=` not `data=`
timeout=90 # give the model time to load
)
r.raise_for_status()
data = r.json()
# HF returns a list of generated texts for standard text-generation models
if isinstance(data, list) and data:
return data[0].get("generated_text", "").strip()
except requests.exceptions.RequestException as e:
# You might want to log `r.text` as well for quota or auth errors
print("❌ Mistral API error:", str(e))
return ""