blade-inspection-demo / gptoss_wrapper.py
Kesheratmex
**Add GPT‑4 Vision support with detailed prompt and fallback**
199293f
raw
history blame
18.2 kB
"""
GPTOSSWrapper - Simple integration wrapper for OpenAI or Hugging Face Inference API.
Usage:
from gptoss_wrapper import GPTOSSWrapper
w = GPTOSSWrapper(model="gpt-oss-120")
text = w.generate(prompt)
Behavior:
- Provider selection (priority):
1) If OPENAI_API_KEY is set -> use OpenAI Chat Completions (v1/chat/completions)
2) Else if HUGGINGFACE_API_TOKEN or HF_API_TOKEN is set -> use Hugging Face Inference API
3) Else -> generate() will raise a RuntimeError describing missing credentials.
Note for Spaces:
- Add the secret in your Space settings (Settings → Secrets & variables → Add secret):
- For OpenAI: key name = OPENAI_API_KEY, value = <your_openai_api_key>
- For Hugging Face: key name = HUGGINGFACE_API_TOKEN (or HF_API_TOKEN), value = <your_hf_token>
This file intentionally uses only the requests stdlib-friendly HTTP approach to avoid depending on extra SDKs.
"""
import os
import time
import requests
import base64
from typing import Optional
class GPTOSSWrapper:
"""
Lightweight wrapper that can call either OpenAI or Hugging Face inference endpoints.
Constructor:
GPTOSSWrapper(model="gpt-oss-120", provider="auto")
- model: model name to request (for OpenAI it must be an available model for your account;
for Hugging Face it should be a model id hosted on HF).
- provider: "auto" (default) | "openai" | "hf"
"""
def __init__(self, model: str = "gpt-oss-120", provider: str = "auto"):
# Allow overriding the model via env var MODEL_ID (useful in Spaces)
env_model = os.getenv("MODEL_ID")
if env_model:
self.model = env_model
else:
self.model = model
self.request_timeout = 30
self.openai_key = os.getenv("OPENAI_API_KEY")
# Accept multiple HF token environment variable names for compatibility:
# HUGGINGFACE_API_TOKEN, HF_API_TOKEN, or HF_TOKEN (used by some HF examples)
self.hf_token = (
os.getenv("HUGGINGFACE_API_TOKEN")
or os.getenv("HF_API_TOKEN")
or os.getenv("HF_TOKEN")
)
self.provider = provider.lower() if provider else "auto"
# If we have an HF token and the user didn't explicitly set a MODEL_ID,
# prefer the HF router and use a sensible default router model id.
if self.hf_token and not env_model and model == "gpt-oss-120":
# Default router model id; you can override via MODEL_ID env var in the Space
self.model = "openai/gpt-oss-120b:fireworks-ai"
if self.provider == "auto":
if self.openai_key:
self.provider = "openai"
elif self.hf_token:
self.provider = "hf"
else:
self.provider = "none"
def generate(self, prompt: str, max_tokens: int = 512, temperature: float = 0.2) -> str:
"""
Generate a textual response for the given prompt.
Returns:
A string with the generated text.
Raises:
RuntimeError if no credentials are found or the remote call fails.
"""
if self.provider == "openai":
return self._generate_openai(prompt, max_tokens=max_tokens, temperature=temperature)
elif self.provider == "hf":
return self._generate_hf(prompt, max_tokens=max_tokens, temperature=temperature)
else:
raise RuntimeError(
"No API key configured for GPT wrapper. Set OPENAI_API_KEY or HUGGINGFACE_API_TOKEN in the environment."
)
def analyze_image(self, image_path: str, prompt: str, max_tokens: int = 512, temperature: float = 0.2) -> str:
"""
Analyze an image using vision models (OpenAI GPT-4 Vision or Hugging Face Qwen2-VL).
Args:
image_path: Path to the image file
prompt: Text prompt for analysis
max_tokens: Maximum tokens in response
temperature: Temperature for generation
Returns:
Analysis text from vision model
Raises:
RuntimeError if no vision model is available or if the call fails
"""
if self.provider == "openai":
return self._analyze_image_openai(image_path, prompt, max_tokens, temperature)
elif self.provider == "hf":
return self._analyze_image_hf(image_path, prompt, max_tokens, temperature)
else:
raise RuntimeError("Image analysis requires either OpenAI API key or Hugging Face token. Set OPENAI_API_KEY or HUGGINGFACE_API_TOKEN.")
def _generate_openai(self, prompt: str, max_tokens: int, temperature: float) -> str:
if not self.openai_key:
raise RuntimeError("OPENAI_API_KEY not set in environment.")
url = "https://api.openai.com/v1/chat/completions"
headers = {
"Authorization": f"Bearer {self.openai_key}",
"Content-Type": "application/json",
}
# Build a simple chat conversation with a single system + user message
payload = {
"model": self.model,
"messages": [
{"role": "system", "content": "You are an expert inspection assistant for wind turbine blade images/videos."},
{"role": "user", "content": prompt},
],
"max_tokens": max_tokens,
"temperature": float(temperature),
"n": 1,
}
try:
r = requests.post(url, headers=headers, json=payload, timeout=self.request_timeout)
r.raise_for_status()
data = r.json()
# OpenAI API returns a list of choices
choices = data.get("choices", [])
if not choices:
raise RuntimeError(f"OpenAI returned empty choices: {data}")
# Extract the assistant message
msg = choices[0].get("message", {}).get("content")
if msg is None:
# Some deployments return text in 'text' or in other fields; fallback to stringifying response
return str(data)
return msg.strip()
except Exception as e:
# Surface a clear error for the calling code to handle (the app catches exceptions)
raise RuntimeError(f"OpenAI API call failed: {e}")
def _generate_hf(self, prompt: str, max_tokens: int, temperature: float) -> str:
if not self.hf_token:
raise RuntimeError("HUGGINGFACE_API_TOKEN (or HF_API_TOKEN / HF_TOKEN) not set in environment.")
# Prefer the HF router automatically when an HF token is present unless explicitly disabled.
use_router = False
# If HF token exists, default to using the router (unless HF_USE_ROUTER is set to a falsey value).
if self.hf_token:
hf_use_router_val = os.getenv("HF_USE_ROUTER", "").lower()
if hf_use_router_val in ("0", "false", "no"):
use_router = False
else:
use_router = True
# Explicit enable via HF_USE_ROUTER env var
if os.getenv("HF_USE_ROUTER", "").lower() in ("1", "true", "yes"):
use_router = True
# Also enable router if model id looks like an OpenAI-style id
if "openai/" in (self.model or "") or ":" in (self.model or ""):
use_router = True
try:
if use_router:
# Router (OpenAI-compatible) endpoint: accepts chat/completions style payloads
url = "https://router.huggingface.co/v1/chat/completions"
headers = {"Authorization": f"Bearer {self.hf_token}", "Content-Type": "application/json"}
payload = {
"model": self.model,
"messages": [
{"role": "system", "content": "You are an expert inspection assistant for wind turbine blade images/videos."},
{"role": "user", "content": prompt},
],
"max_tokens": max_tokens,
"temperature": float(temperature),
"n": 1,
}
r = requests.post(url, headers=headers, json=payload, timeout=self.request_timeout)
r.raise_for_status()
data = r.json()
# Try to extract OpenAI-style response
choices = data.get("choices", [])
if choices and isinstance(choices, list):
first = choices[0]
# OpenAI-compatible router usually returns message under 'message'
msg = first.get("message", {}).get("content") if isinstance(first, dict) else None
# Some router variants may return text under 'text' or 'content'
if not msg:
msg = first.get("text") or first.get("content")
if msg:
return msg.strip()
# Fallback stringify
return str(data)
else:
# Standard Hugging Face inference API
url = f"https://api-inference.huggingface.co/models/{self.model}"
headers = {"Authorization": f"Bearer {self.hf_token}"}
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": max_tokens,
"temperature": float(temperature),
},
"options": {"wait_for_model": True},
}
r = requests.post(url, headers=headers, json=payload, timeout=self.request_timeout)
r.raise_for_status()
data = r.json()
# Hugging Face inference may return a list of generated outputs or a dict
if isinstance(data, list) and len(data) > 0 and isinstance(data[0], dict) and "generated_text" in data[0]:
return data[0]["generated_text"].strip()
elif isinstance(data, dict) and "generated_text" in data:
return data["generated_text"].strip()
elif isinstance(data, dict) and "error" in data:
raise RuntimeError(f"Hugging Face error: {data['error']}")
else:
# Some text-generation endpoints return a plain string or different struct; try to stringify
return str(data)
except Exception as e:
raise RuntimeError(f"Hugging Face API call failed: {e}")
def _analyze_image_openai(self, image_path: str, prompt: str, max_tokens: int, temperature: float) -> str:
"""
Analyze an image using OpenAI GPT-4 Vision API.
"""
if not self.openai_key:
raise RuntimeError("OPENAI_API_KEY not set in environment.")
# Encode image to base64
try:
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
except Exception as e:
raise RuntimeError(f"Failed to read image file {image_path}: {e}")
url = "https://api.openai.com/v1/chat/completions"
headers = {
"Authorization": f"Bearer {self.openai_key}",
"Content-Type": "application/json",
}
# Use GPT-4 Vision model
vision_model = "gpt-4-vision-preview"
# Build payload for vision API
payload = {
"model": vision_model,
"messages": [
{
"role": "system",
"content": "You are an expert inspection assistant for wind turbine blade images/videos. Analyze images in detail and provide comprehensive assessments in Spanish."
},
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
"detail": "high"
}
}
]
}
],
"max_tokens": max_tokens,
"temperature": float(temperature),
}
try:
r = requests.post(url, headers=headers, json=payload, timeout=60) # Longer timeout for vision
r.raise_for_status()
data = r.json()
choices = data.get("choices", [])
if not choices:
raise RuntimeError(f"OpenAI Vision returned empty choices: {data}")
msg = choices[0].get("message", {}).get("content")
if msg is None:
return str(data)
return msg.strip()
except Exception as e:
raise RuntimeError(f"OpenAI Vision API call failed: {e}")
def _analyze_image_hf(self, image_path: str, prompt: str, max_tokens: int, temperature: float) -> str:
"""
Analyze an image using Hugging Face vision models (like Qwen2-VL).
"""
if not self.hf_token:
raise RuntimeError("HUGGINGFACE_API_TOKEN not set in environment.")
# Encode image to base64
try:
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
except Exception as e:
raise RuntimeError(f"Failed to read image file {image_path}: {e}")
# Use Qwen2-VL model for vision analysis
vision_model = os.getenv("VISION_MODEL_ID", "Qwen/Qwen2-VL-7B-Instruct")
# Check if we should use the router
use_router = False
if self.hf_token:
hf_use_router_val = os.getenv("HF_USE_ROUTER", "").lower()
if hf_use_router_val not in ("0", "false", "no"):
use_router = True
try:
if use_router:
# Router endpoint for vision models
url = "https://router.huggingface.co/v1/chat/completions"
headers = {"Authorization": f"Bearer {self.hf_token}", "Content-Type": "application/json"}
payload = {
"model": vision_model,
"messages": [
{
"role": "system",
"content": "You are an expert inspection assistant for wind turbine blade images/videos. Analyze images in detail and provide comprehensive assessments in Spanish."
},
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": max_tokens,
"temperature": float(temperature),
}
r = requests.post(url, headers=headers, json=payload, timeout=120)
r.raise_for_status()
data = r.json()
choices = data.get("choices", [])
if choices and isinstance(choices, list):
first = choices[0]
msg = first.get("message", {}).get("content") if isinstance(first, dict) else None
if not msg:
msg = first.get("text") or first.get("content")
if msg:
return msg.strip()
return str(data)
else:
# Direct Hugging Face Inference API for vision models
url = f"https://api-inference.huggingface.co/models/{vision_model}"
headers = {"Authorization": f"Bearer {self.hf_token}"}
# For vision models, we need to send both text and image
payload = {
"inputs": {
"text": prompt,
"image": base64_image
},
"parameters": {
"max_new_tokens": max_tokens,
"temperature": float(temperature),
},
"options": {"wait_for_model": True},
}
r = requests.post(url, headers=headers, json=payload, timeout=120)
r.raise_for_status()
data = r.json()
# Handle different response formats
if isinstance(data, list) and len(data) > 0:
if isinstance(data[0], dict):
if "generated_text" in data[0]:
return data[0]["generated_text"].strip()
elif "text" in data[0]:
return data[0]["text"].strip()
elif isinstance(data, dict):
if "generated_text" in data:
return data["generated_text"].strip()
elif "text" in data:
return data["text"].strip()
elif "error" in data:
raise RuntimeError(f"Hugging Face error: {data['error']}")
return str(data)
except Exception as e:
raise RuntimeError(f"Hugging Face Vision API call failed: {e}")
# Backwards-compatible factory in case caller expects a function or attribute
def GPTOSSWrapperFactory(model: Optional[str] = None, provider: Optional[str] = None):
return GPTOSSWrapper(model=model or "gpt-oss-120", provider=provider or "auto")