File size: 2,607 Bytes
d3051c0
 
 
 
b1c8f17
26e0ddc
d3051c0
26e0ddc
b1c8f17
2e76cf7
d3051c0
 
 
 
 
 
 
 
 
b1c8f17
1fc729a
d3051c0
 
 
 
 
26e0ddc
 
 
 
d3051c0
 
26e0ddc
 
 
 
 
 
 
 
 
d3051c0
 
26e0ddc
d3051c0
 
26e0ddc
d3051c0
 
 
 
 
 
 
 
 
26e0ddc
d3051c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from typing import Literal
import os
from functools import lru_cache
from openai import OpenAI

app = FastAPI()

ModelID = Literal[
    "meta-llama/llama-3-70b-instruct",
    "anthropic/claude-3.5-sonnet",
    "deepseek/deepseek-coder",
    "anthropic/claude-3-haiku",
    "openai/gpt-3.5-turbo-instruct",
    "qwen/qwen-72b-chat",
    "google/gemma-2-27b-it"
]

class QueryModel(BaseModel):
    user_query: str = Field(..., description="User's coding query")
    model_id: ModelID = Field(
        default="meta-llama/llama-3-70b-instruct",
        description="ID of the model to use for response generation"
    )

    class Config:
        schema_extra = {
            "example": {
                "user_query": "How do I implement a binary search in Python?",
                "model_id": "meta-llama/llama-3-70b-instruct"
            }
        }

@lru_cache()
def get_api_keys():
    return {
        "OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}"
    }

api_keys = get_api_keys()
or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")

@lru_cache()
def chat_with_llama_stream(messages, model, max_output_tokens=4000):
    try:
        response = or_client.chat.completions.create(
            model=model,
            messages=messages,
            max_tokens=max_output_tokens,
            stream=True
        )
        for chunk in response:
            if chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")

@app.post("/coding-assistant")
async def coding_assistant(query: QueryModel):
    """
    Coding assistant endpoint that provides programming help based on user queries.

    Available models:
    - meta-llama/llama-3-70b-instruct (default)
    - anthropic/claude-3.5-sonnet
    - deepseek/deepseek-coder
    - anthropic/claude-3-haiku
    - openai/gpt-3.5-turbo-instruct
    - qwen/qwen-72b-chat
    - google/gemma-2-27b-it
    """
    system_prompt = "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code."
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": query.user_query}
    ]

    return StreamingResponse(
        chat_with_llama_stream(messages, model=query.model_id),
        media_type="text/event-stream"
    )