Spaces:
Running
Running
File size: 2,607 Bytes
d3051c0 b1c8f17 26e0ddc d3051c0 26e0ddc b1c8f17 2e76cf7 d3051c0 b1c8f17 1fc729a d3051c0 26e0ddc d3051c0 26e0ddc d3051c0 26e0ddc d3051c0 26e0ddc d3051c0 26e0ddc d3051c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from typing import Literal
import os
from functools import lru_cache
from openai import OpenAI
app = FastAPI()
ModelID = Literal[
"meta-llama/llama-3-70b-instruct",
"anthropic/claude-3.5-sonnet",
"deepseek/deepseek-coder",
"anthropic/claude-3-haiku",
"openai/gpt-3.5-turbo-instruct",
"qwen/qwen-72b-chat",
"google/gemma-2-27b-it"
]
class QueryModel(BaseModel):
user_query: str = Field(..., description="User's coding query")
model_id: ModelID = Field(
default="meta-llama/llama-3-70b-instruct",
description="ID of the model to use for response generation"
)
class Config:
schema_extra = {
"example": {
"user_query": "How do I implement a binary search in Python?",
"model_id": "meta-llama/llama-3-70b-instruct"
}
}
@lru_cache()
def get_api_keys():
return {
"OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}"
}
api_keys = get_api_keys()
or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")
@lru_cache()
def chat_with_llama_stream(messages, model, max_output_tokens=4000):
try:
response = or_client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_output_tokens,
stream=True
)
for chunk in response:
if chunk.choices[0].delta.content is not None:
yield chunk.choices[0].delta.content
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
@app.post("/coding-assistant")
async def coding_assistant(query: QueryModel):
"""
Coding assistant endpoint that provides programming help based on user queries.
Available models:
- meta-llama/llama-3-70b-instruct (default)
- anthropic/claude-3.5-sonnet
- deepseek/deepseek-coder
- anthropic/claude-3-haiku
- openai/gpt-3.5-turbo-instruct
- qwen/qwen-72b-chat
- google/gemma-2-27b-it
"""
system_prompt = "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code."
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": query.user_query}
]
return StreamingResponse(
chat_with_llama_stream(messages, model=query.model_id),
media_type="text/event-stream"
)
|