Spaces:
Running
Running
File size: 6,943 Bytes
f7c0abb 904ed0d d0fc55f 904ed0d f7c0abb 05d6121 f9d8346 904ed0d e7b1f60 904ed0d e7b1f60 904ed0d 465b43c e7b1f60 fa8e2ce 6025f1c 05d6121 e7b1f60 2372d93 05d6121 6025f1c 603790a d0fc55f f7c0abb 9ab6d04 6025f1c e181176 f7c0abb d0fc55f 045ef7e f7c0abb 05d6121 f7c0abb 904ed0d 05d6121 e7b1f60 05d6121 b9e465f 9ab6d04 fa8e2ce e7b1f60 93c4b1f 7a83ce6 20d0b59 904ed0d 807cc58 904ed0d 387e225 05d6121 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
import os
from fastapi import FastAPI, HTTPException, Query, File, UploadFile, Form
from fastapi.responses import StreamingResponse, JSONResponse
from openai import AsyncOpenAI
import base64
from typing import Optional
app = FastAPI()
# Define available models (unchanged)
AVAILABLE_MODELS = {
"openai/gpt-4.1": "OpenAI GPT-4.1",
"openai/gpt-4.1-mini": "OpenAI GPT-4.1-mini",
"openai/gpt-4.1-nano": "OpenAI GPT-4.1-nano",
"openai/gpt-4o": "OpenAI GPT-4o",
"openai/gpt-4o-mini": "OpenAI GPT-4o mini",
"openai/o4-mini": "OpenAI o4-mini",
"microsoft/MAI-DS-R1": "MAI-DS-R1",
"microsoft/Phi-3.5-MoE-instruct": "Phi-3.5-MoE instruct (128k)",
"microsoft/Phi-3.5-mini-instruct": "Phi-3.5-mini instruct (128k)",
"microsoft/Phi-3.5-vision-instruct": "Phi-3.5-vision instruct (128k)",
"microsoft/Phi-3-medium-128k-instruct": "Phi-3-medium instruct (128k)",
"microsoft/Phi-3-medium-4k-instruct": "Phi-3-medium instruct (4k)",
"microsoft/Phi-3-mini-128k-instruct": "Phi-3-mini instruct (128k)",
"microsoft/Phi-3-small-128k-instruct": "Phi-3-small instruct (128k)",
"microsoft/Phi-3-small-8k-instruct": "Phi-3-small instruct (8k)",
"microsoft/Phi-4": "Phi-4",
"microsoft/Phi-4-mini-instruct": "Phi-4-mini-instruct",
"microsoft/Phi-4-multimodal-instruct": "Phi-4-multimodal-instruct",
"ai21-labs/AI21-Jamba-1.5-Large": "AI21 Jamba 1.5 Large",
"ai21-labs/AI21-Jamba-1.5-Mini": "AI21 Jamba 1.5 Mini",
"mistral-ai/Codestral-2501": "Codestral 25.01",
"cohere/Cohere-command-r": "Cohere Command R",
"cohere/Cohere-command-r-08-2024": "Cohere Command R 08-2024",
"cohere/Cohere-command-r-plus": "Cohere Command R+",
"cohere/Cohere-command-r-plus-08-2024": "Cohere Command R+ 08-2024",
"deepseek/DeepSeek-R1": "DeepSeek-R1",
"deepseek/DeepSeek-V3-0324": "DeepSeek-V3-0324",
"meta/Llama-3.2-11B-Vision-Instruct": "Llama-3.2-11B-Vision-Instruct",
"meta/Llama-3.2-90B-Vision-Instruct": "Llama-3.2-90B-Vision-Instruct",
"meta/Llama-3.3-70B-Instruct": "Llama-3.3-70B-Instruct",
"meta/Llama-4-Maverick-17B-128E-Instruct-FP8": "Llama 4 Maverick 17B 128E Instruct FP8",
"meta/Llama-4-Scout-17B-16E-Instruct": "Llama 4 Scout 17B 16E Instruct",
"meta/Meta-Llama-3.1-405B-Instruct": "Meta-Llama-3.1-405B-Instruct",
"meta/Meta-Llama-3.1-70B-Instruct": "Meta-Llama-3.1-70B-Instruct",
"meta/Meta-Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct",
"meta/Meta-Llama-3-70B-Instruct": "Meta-Llama-3-70B-Instruct",
"meta/Meta-Llama-3-8B-Instruct": "Meta-Llama-3-8B-Instruct",
"mistral-ai/Ministral-3B": "Ministral 3B",
"mistral-ai/Mistral-Large-2411": "Mistral Large 24.11",
"mistral-ai/Mistral-Nemo": "Mistral Nemo",
"mistral-ai/Mistral-large-2407": "Mistral Large (2407)",
"mistral-ai/Mistral-small": "Mistral Small",
"cohere/cohere-command-a": "Cohere Command A",
"core42/jais-30b-chat": "JAIS 30b Chat",
"mistral-ai/mistral-small-2503": "Mistral Small 3.1"
}
# Vision-capable models (subset of AVAILABLE_MODELS)
VISION_MODELS = [
"openai/gpt-4o",
"openai/gpt-4o-mini",
"microsoft/Phi-3.5-vision-instruct",
"meta/Llama-3.2-11B-Vision-Instruct",
"meta/Llama-3.2-90B-Vision-Instruct",
"microsoft/Phi-4-multimodal-instruct"
]
async def generate_ai_response(prompt: str, model: str):
token = os.getenv("GITHUB_TOKEN")
if not token:
raise HTTPException(status_code=500, detail="GitHub token not configured")
endpoint = "https://models.github.ai/inference"
if model not in AVAILABLE_MODELS:
raise HTTPException(status_code=400, detail=f"Model not available. Choose from: {', '.join(AVAILABLE_MODELS.keys())}")
client = AsyncOpenAI(base_url=endpoint, api_key=token)
try:
stream = await client.chat.completions.create(
messages=[
{"role": "user", "content": prompt}
],
model=model,
temperature=1.0,
top_p=1.0,
stream=True
)
async for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
except Exception as err:
yield f"Error: {str(err)}"
raise HTTPException(status_code=500, detail="AI generation failed")
async def process_image_with_vision(image: bytes, question: str, model: str):
token = os.getenv("GITHUB_TOKEN")
if not token:
raise HTTPException(status_code=500, detail="GitHub token not configured")
endpoint = "https://models.github.ai/inference"
if model not in VISION_MODELS:
raise HTTPException(status_code=400, detail=f"Model not vision-capable. Choose from: {', '.join(VISION_MODELS)}")
client = AsyncOpenAI(base_url=endpoint, api_key=token)
# Encode image to base64
base64_image = base64.b64encode(image).decode("utf-8")
try:
# Non-streaming request for vision task
response = await client.chat.completions.create(
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": question},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
}
]
}
],
model=model,
temperature=1.0,
top_p=1.0,
stream=False # Vision tasks typically don't stream
)
return response.choices[0].message.content
except Exception as err:
raise HTTPException(status_code=500, detail=f"Vision processing failed: {str(err)}")
@app.post("/generate")
async def generate_response(
prompt: str = Query(..., description="The prompt for the AI"),
model: str = Query("openai/gpt-4.1-mini", description="The model to use for generation")
):
if not prompt:
raise HTTPException(status_code=400, detail="Prompt cannot be empty")
return StreamingResponse(
generate_ai_response(prompt, model),
media_type="text/event-stream"
)
@app.post("/process-image")
async def process_image(
image: UploadFile = File(..., description="Image file (PNG, JPEG, GIF)"),
question: str = Form(..., description="Question about the image"),
model: str = Form("openai/gpt-4o", description="Vision-capable model")
):
# Validate image format
if not image.filename.lower().endswith((".png", ".jpg", ".jpeg", ".gif")):
raise HTTPException(status_code=400, detail="Unsupported image format. Use PNG, JPEG, or GIF.")
# Read image content
image_data = await image.read()
# Process image with vision model
response = await process_image_with_vision(image_data, question, model)
return response
def get_app():
return app |