Spaces:

nuseAI
/

FastAPI

Sleeping

File size: 1,587 Bytes

f00f379
 
17fbf3d
f00f379
 
 
 
17fbf3d
f00f379
 
 
 
17fbf3d
f00f379
 
17fbf3d
 
 
 
 
 
 
 
f00f379
 
 
 
17fbf3d
f00f379
 
 
 
17fbf3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f00f379

import os
import requests
import json
from dotenv import load_dotenv

load_dotenv()

HF_TOKEN   = os.getenv("HF_TOKEN")
HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"

HEADERS = {
    "Authorization": f"Bearer {HF_TOKEN}",
    "Content-Type":  "application/json"
}

def mistral_generate(prompt: str,
                     max_new_tokens: int = 128,
                     temperature: float = 0.7) -> str:
    """
    Call the HF Inference-API for Mistral-7B-Instruct-v0.3.
    - Automatically waits while the model spins up (`wait_for_model=true`).
    - Returns the generated text or an empty string on failure.
    """
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": max_new_tokens,
            "temperature":    temperature
        }
    }

    try:
        r = requests.post(
            HF_API_URL,
            headers=HEADERS,
            params={"wait_for_model": "true"},  # ⭐ key change
            json=payload,                       # use `json=` not `data=`
            timeout=90                          # give the model time to load
        )
        r.raise_for_status()
        data = r.json()

        # HF returns a list of generated texts for standard text-generation models
        if isinstance(data, list) and data:
            return data[0].get("generated_text", "").strip()

    except requests.exceptions.RequestException as e:
        # You might want to log `r.text` as well for quota or auth errors
        print("❌ Mistral API error:", str(e))

    return ""