general_chat / main.py
pvanand's picture
Update main.py
d3051c0 verified
raw
history blame
2.61 kB
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from typing import Literal
import os
from functools import lru_cache
from openai import OpenAI
app = FastAPI()
ModelID = Literal[
"meta-llama/llama-3-70b-instruct",
"anthropic/claude-3.5-sonnet",
"deepseek/deepseek-coder",
"anthropic/claude-3-haiku",
"openai/gpt-3.5-turbo-instruct",
"qwen/qwen-72b-chat",
"google/gemma-2-27b-it"
]
class QueryModel(BaseModel):
user_query: str = Field(..., description="User's coding query")
model_id: ModelID = Field(
default="meta-llama/llama-3-70b-instruct",
description="ID of the model to use for response generation"
)
class Config:
schema_extra = {
"example": {
"user_query": "How do I implement a binary search in Python?",
"model_id": "meta-llama/llama-3-70b-instruct"
}
}
@lru_cache()
def get_api_keys():
return {
"OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}"
}
api_keys = get_api_keys()
or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")
@lru_cache()
def chat_with_llama_stream(messages, model, max_output_tokens=4000):
try:
response = or_client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_output_tokens,
stream=True
)
for chunk in response:
if chunk.choices[0].delta.content is not None:
yield chunk.choices[0].delta.content
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
@app.post("/coding-assistant")
async def coding_assistant(query: QueryModel):
"""
Coding assistant endpoint that provides programming help based on user queries.
Available models:
- meta-llama/llama-3-70b-instruct (default)
- anthropic/claude-3.5-sonnet
- deepseek/deepseek-coder
- anthropic/claude-3-haiku
- openai/gpt-3.5-turbo-instruct
- qwen/qwen-72b-chat
- google/gemma-2-27b-it
"""
system_prompt = "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code."
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": query.user_query}
]
return StreamingResponse(
chat_with_llama_stream(messages, model=query.model_id),
media_type="text/event-stream"
)