Spaces:
Running
Running
Update api.py
Browse files
api.py
CHANGED
|
@@ -33,6 +33,7 @@ app.add_middleware(
|
|
| 33 |
MODELS = {}
|
| 34 |
VOYAGE_API_KEY = os.environ.get('VOYAGE_API_KEY', '')
|
| 35 |
FIREWORKS_API_KEY = os.environ.get('FIREWORKS_API_KEY', '')
|
|
|
|
| 36 |
API_KEY = os.environ.get('API_KEY', '')
|
| 37 |
REQUIRE_API_KEY = os.environ.get('REQUIRE_API_KEY', 'false').lower() == 'true'
|
| 38 |
|
|
@@ -48,6 +49,7 @@ for cache_dir in [os.environ['TRANSFORMERS_CACHE'], os.environ['HF_HOME'], os.en
|
|
| 48 |
security = HTTPBearer(auto_error=False)
|
| 49 |
voyage_client = None
|
| 50 |
fireworks_available = False
|
|
|
|
| 51 |
|
| 52 |
logger.info(f"API Key authentication: {'ENABLED' if REQUIRE_API_KEY else 'DISABLED'}")
|
| 53 |
if API_KEY:
|
|
@@ -88,6 +90,17 @@ if FIREWORKS_API_KEY:
|
|
| 88 |
# Still mark as available if key is set
|
| 89 |
fireworks_available = True if FIREWORKS_API_KEY else False
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
def load_models():
|
| 92 |
"""Load embedding models on startup (gracefully handles failures)"""
|
| 93 |
|
|
@@ -115,14 +128,17 @@ def load_models():
|
|
| 115 |
except Exception as e:
|
| 116 |
logger.warning(f"⚠️ Jina AI v3 not loaded: {e}")
|
| 117 |
|
| 118 |
-
# Qwen3-Embedding-8B via Fireworks AI (API-based, no download needed!)
|
| 119 |
if fireworks_available:
|
| 120 |
MODELS['qwen3'] = 'fireworks' # Mark as available via Fireworks AI
|
| 121 |
logger.info("✓ Qwen3-Embedding-8B available via Fireworks AI API (MTEB #1, no local model needed)")
|
|
|
|
|
|
|
|
|
|
| 122 |
else:
|
| 123 |
logger.warning("⚠️ Qwen3-Embedding-8B not available")
|
| 124 |
-
logger.warning(" To enable: Set FIREWORKS_API_KEY environment variable")
|
| 125 |
-
logger.warning("
|
| 126 |
logger.warning(" This avoids 15GB local download!")
|
| 127 |
|
| 128 |
# Check if at least one model loaded
|
|
@@ -205,6 +221,44 @@ def get_fireworks_embeddings(texts: List[str], task: Optional[str] = None) -> Li
|
|
| 205 |
|
| 206 |
return embeddings
|
| 207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
@app.on_event("startup")
|
| 209 |
async def startup_event():
|
| 210 |
load_models()
|
|
@@ -267,6 +321,7 @@ class HealthResponse(BaseModel):
|
|
| 267 |
models_loaded: List[str]
|
| 268 |
voyage_available: bool
|
| 269 |
fireworks_available: bool
|
|
|
|
| 270 |
api_key_required: bool
|
| 271 |
|
| 272 |
@app.get("/", response_model=dict)
|
|
@@ -293,6 +348,7 @@ async def health():
|
|
| 293 |
"models_loaded": models_loaded,
|
| 294 |
"voyage_available": voyage_client is not None,
|
| 295 |
"fireworks_available": fireworks_available,
|
|
|
|
| 296 |
"api_key_required": REQUIRE_API_KEY
|
| 297 |
}
|
| 298 |
|
|
@@ -315,7 +371,8 @@ async def create_embeddings_elasticsearch(
|
|
| 315 |
- `jobbertv2`: JobBERT-v2 (768-dim, job-specific)
|
| 316 |
- `jobbertv3`: JobBERT-v3 (768-dim, job-specific, improved performance) - default
|
| 317 |
- `jina`: Jina AI embeddings-v3 (1024-dim, general purpose)
|
| 318 |
-
- `qwen3`: Qwen3-Embedding-8B (4096-dim, MTEB #1, multilingual, 32k context)
|
|
|
|
| 319 |
- `voyage`: Voyage AI (1024-dim, requires API key)
|
| 320 |
|
| 321 |
**Jina AI Tasks (via query parameter):**
|
|
@@ -371,6 +428,36 @@ async def create_embeddings_elasticsearch(
|
|
| 371 |
except Exception as e:
|
| 372 |
raise HTTPException(status_code=500, detail=f"Voyage AI error: {str(e)}")
|
| 373 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
elif model_name in MODELS:
|
| 375 |
try:
|
| 376 |
selected_model = MODELS[model_name]
|
|
@@ -378,6 +465,9 @@ async def create_embeddings_elasticsearch(
|
|
| 378 |
# Qwen3 via Fireworks AI API (no local model)
|
| 379 |
if model_name == "qwen3" and selected_model == 'fireworks':
|
| 380 |
embeddings_list = get_fireworks_embeddings(texts, task=task)
|
|
|
|
|
|
|
|
|
|
| 381 |
# Jina AI with task type
|
| 382 |
elif model_name == "jina" and task:
|
| 383 |
embeddings = selected_model.encode(
|
|
@@ -486,6 +576,9 @@ async def create_embeddings_batch(
|
|
| 486 |
# Qwen3 via Fireworks AI API (no local model)
|
| 487 |
if model_name == "qwen3" and selected_model == 'fireworks':
|
| 488 |
embeddings_list = get_fireworks_embeddings(request.texts, task=request.task)
|
|
|
|
|
|
|
|
|
|
| 489 |
# Jina AI with task type
|
| 490 |
elif model_name == "jina" and request.task:
|
| 491 |
embeddings = selected_model.encode(
|
|
|
|
| 33 |
MODELS = {}
|
| 34 |
VOYAGE_API_KEY = os.environ.get('VOYAGE_API_KEY', '')
|
| 35 |
FIREWORKS_API_KEY = os.environ.get('FIREWORKS_API_KEY', '')
|
| 36 |
+
OPENROUTER_API_KEY = os.environ.get('OPENROUTER_API_KEY', '')
|
| 37 |
API_KEY = os.environ.get('API_KEY', '')
|
| 38 |
REQUIRE_API_KEY = os.environ.get('REQUIRE_API_KEY', 'false').lower() == 'true'
|
| 39 |
|
|
|
|
| 49 |
security = HTTPBearer(auto_error=False)
|
| 50 |
voyage_client = None
|
| 51 |
fireworks_available = False
|
| 52 |
+
openrouter_available = False
|
| 53 |
|
| 54 |
logger.info(f"API Key authentication: {'ENABLED' if REQUIRE_API_KEY else 'DISABLED'}")
|
| 55 |
if API_KEY:
|
|
|
|
| 90 |
# Still mark as available if key is set
|
| 91 |
fireworks_available = True if FIREWORKS_API_KEY else False
|
| 92 |
|
| 93 |
+
if OPENROUTER_API_KEY:
|
| 94 |
+
try:
|
| 95 |
+
import requests
|
| 96 |
+
openrouter_available = True
|
| 97 |
+
logger.info("✓ OpenRouter API key configured (Qwen3, text-embedding-3-small, and more available)")
|
| 98 |
+
except ImportError:
|
| 99 |
+
logger.warning("⚠️ requests package not installed (needed for OpenRouter)")
|
| 100 |
+
except Exception as e:
|
| 101 |
+
logger.warning(f"⚠️ OpenRouter validation failed: {e}")
|
| 102 |
+
openrouter_available = True if OPENROUTER_API_KEY else False
|
| 103 |
+
|
| 104 |
def load_models():
|
| 105 |
"""Load embedding models on startup (gracefully handles failures)"""
|
| 106 |
|
|
|
|
| 128 |
except Exception as e:
|
| 129 |
logger.warning(f"⚠️ Jina AI v3 not loaded: {e}")
|
| 130 |
|
| 131 |
+
# Qwen3-Embedding-8B via Fireworks AI or OpenRouter (API-based, no download needed!)
|
| 132 |
if fireworks_available:
|
| 133 |
MODELS['qwen3'] = 'fireworks' # Mark as available via Fireworks AI
|
| 134 |
logger.info("✓ Qwen3-Embedding-8B available via Fireworks AI API (MTEB #1, no local model needed)")
|
| 135 |
+
elif openrouter_available:
|
| 136 |
+
MODELS['qwen3'] = 'openrouter' # Mark as available via OpenRouter
|
| 137 |
+
logger.info("✓ Qwen3-Embedding-8B available via OpenRouter API (MTEB #1, no local model needed)")
|
| 138 |
else:
|
| 139 |
logger.warning("⚠️ Qwen3-Embedding-8B not available")
|
| 140 |
+
logger.warning(" To enable: Set FIREWORKS_API_KEY or OPENROUTER_API_KEY environment variable")
|
| 141 |
+
logger.warning(" Fireworks: https://fireworks.ai | OpenRouter: https://openrouter.ai")
|
| 142 |
logger.warning(" This avoids 15GB local download!")
|
| 143 |
|
| 144 |
# Check if at least one model loaded
|
|
|
|
| 221 |
|
| 222 |
return embeddings
|
| 223 |
|
| 224 |
+
def get_openrouter_embeddings(texts: List[str], model: str = "qwen/qwen3-embedding-8b") -> List[List[float]]:
|
| 225 |
+
"""
|
| 226 |
+
Get embeddings from OpenRouter API
|
| 227 |
+
|
| 228 |
+
Args:
|
| 229 |
+
texts: List of texts to embed
|
| 230 |
+
model: Model to use (default: qwen/qwen3-embedding-8b)
|
| 231 |
+
Also supports: openai/text-embedding-3-small, openai/text-embedding-3-large
|
| 232 |
+
|
| 233 |
+
Returns:
|
| 234 |
+
List of embedding vectors
|
| 235 |
+
"""
|
| 236 |
+
import requests
|
| 237 |
+
|
| 238 |
+
if not OPENROUTER_API_KEY:
|
| 239 |
+
raise Exception("OPENROUTER_API_KEY not configured")
|
| 240 |
+
|
| 241 |
+
response = requests.post(
|
| 242 |
+
"https://openrouter.ai/api/v1/embeddings",
|
| 243 |
+
headers={
|
| 244 |
+
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
| 245 |
+
"Content-Type": "application/json"
|
| 246 |
+
},
|
| 247 |
+
json={
|
| 248 |
+
"model": model,
|
| 249 |
+
"input": texts
|
| 250 |
+
},
|
| 251 |
+
timeout=30
|
| 252 |
+
)
|
| 253 |
+
|
| 254 |
+
if response.status_code != 200:
|
| 255 |
+
raise Exception(f"OpenRouter API error: {response.status_code} - {response.text}")
|
| 256 |
+
|
| 257 |
+
result = response.json()
|
| 258 |
+
embeddings = [item["embedding"] for item in result["data"]]
|
| 259 |
+
|
| 260 |
+
return embeddings
|
| 261 |
+
|
| 262 |
@app.on_event("startup")
|
| 263 |
async def startup_event():
|
| 264 |
load_models()
|
|
|
|
| 321 |
models_loaded: List[str]
|
| 322 |
voyage_available: bool
|
| 323 |
fireworks_available: bool
|
| 324 |
+
openrouter_available: bool
|
| 325 |
api_key_required: bool
|
| 326 |
|
| 327 |
@app.get("/", response_model=dict)
|
|
|
|
| 348 |
"models_loaded": models_loaded,
|
| 349 |
"voyage_available": voyage_client is not None,
|
| 350 |
"fireworks_available": fireworks_available,
|
| 351 |
+
"openrouter_available": openrouter_available,
|
| 352 |
"api_key_required": REQUIRE_API_KEY
|
| 353 |
}
|
| 354 |
|
|
|
|
| 371 |
- `jobbertv2`: JobBERT-v2 (768-dim, job-specific)
|
| 372 |
- `jobbertv3`: JobBERT-v3 (768-dim, job-specific, improved performance) - default
|
| 373 |
- `jina`: Jina AI embeddings-v3 (1024-dim, general purpose)
|
| 374 |
+
- `qwen3`: Qwen3-Embedding-8B (4096-dim, MTEB #1, multilingual, 32k context, via Fireworks or OpenRouter)
|
| 375 |
+
- `openrouter`: OpenRouter embeddings (supports multiple models, requires API key)
|
| 376 |
- `voyage`: Voyage AI (1024-dim, requires API key)
|
| 377 |
|
| 378 |
**Jina AI Tasks (via query parameter):**
|
|
|
|
| 428 |
except Exception as e:
|
| 429 |
raise HTTPException(status_code=500, detail=f"Voyage AI error: {str(e)}")
|
| 430 |
|
| 431 |
+
elif model_name == "openrouter":
|
| 432 |
+
if not openrouter_available:
|
| 433 |
+
raise HTTPException(
|
| 434 |
+
status_code=503,
|
| 435 |
+
detail="OpenRouter not available. Set OPENROUTER_API_KEY environment variable."
|
| 436 |
+
)
|
| 437 |
+
|
| 438 |
+
try:
|
| 439 |
+
# Use OpenRouter with specified model or default
|
| 440 |
+
openrouter_model = task or "qwen/qwen3-embedding-8b" # Use task param as model selector
|
| 441 |
+
embeddings_list = get_openrouter_embeddings(texts, model=openrouter_model)
|
| 442 |
+
|
| 443 |
+
# Calculate token usage
|
| 444 |
+
token_count = estimate_token_count(texts)
|
| 445 |
+
|
| 446 |
+
# Create OpenAI-compatible response
|
| 447 |
+
data = [
|
| 448 |
+
EmbeddingObject(index=i, embedding=emb)
|
| 449 |
+
for i, emb in enumerate(embeddings_list)
|
| 450 |
+
]
|
| 451 |
+
|
| 452 |
+
return OpenAIEmbeddingResponse(
|
| 453 |
+
model=f"openrouter/{openrouter_model}",
|
| 454 |
+
object="list",
|
| 455 |
+
usage=UsageInfo(total_tokens=token_count, prompt_tokens=token_count),
|
| 456 |
+
data=data
|
| 457 |
+
)
|
| 458 |
+
except Exception as e:
|
| 459 |
+
raise HTTPException(status_code=500, detail=f"OpenRouter error: {str(e)}")
|
| 460 |
+
|
| 461 |
elif model_name in MODELS:
|
| 462 |
try:
|
| 463 |
selected_model = MODELS[model_name]
|
|
|
|
| 465 |
# Qwen3 via Fireworks AI API (no local model)
|
| 466 |
if model_name == "qwen3" and selected_model == 'fireworks':
|
| 467 |
embeddings_list = get_fireworks_embeddings(texts, task=task)
|
| 468 |
+
# Qwen3 via OpenRouter API
|
| 469 |
+
elif model_name == "qwen3" and selected_model == 'openrouter':
|
| 470 |
+
embeddings_list = get_openrouter_embeddings(texts, model="qwen/qwen3-embedding-8b")
|
| 471 |
# Jina AI with task type
|
| 472 |
elif model_name == "jina" and task:
|
| 473 |
embeddings = selected_model.encode(
|
|
|
|
| 576 |
# Qwen3 via Fireworks AI API (no local model)
|
| 577 |
if model_name == "qwen3" and selected_model == 'fireworks':
|
| 578 |
embeddings_list = get_fireworks_embeddings(request.texts, task=request.task)
|
| 579 |
+
# Qwen3 via OpenRouter API
|
| 580 |
+
elif model_name == "qwen3" and selected_model == 'openrouter':
|
| 581 |
+
embeddings_list = get_openrouter_embeddings(request.texts, model="qwen/qwen3-embedding-8b")
|
| 582 |
# Jina AI with task type
|
| 583 |
elif model_name == "jina" and request.task:
|
| 584 |
embeddings = selected_model.encode(
|