Spaces:
Paused
Paused
Upload 10 files
Browse files- Dockerfile +20 -0
- TYPEGPT/typegpt.py +409 -0
- TYPEGPT/typegpt_normal.py +231 -0
- api_info.py +176 -0
- fastapi_app.py +132 -0
- flask_app.py +131 -0
- query.md +384 -0
- requirements.txt +7 -0
- typegpt_api.py +254 -0
- usage_inference.py +158 -0
Dockerfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use an official Python runtime as the base image
|
| 2 |
+
FROM python:3.9-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory in the container
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy the requirements file into the container
|
| 8 |
+
COPY requirements.txt .
|
| 9 |
+
|
| 10 |
+
# Install the required packages
|
| 11 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
+
|
| 13 |
+
# Copy the rest of the application code into the container
|
| 14 |
+
COPY . .
|
| 15 |
+
|
| 16 |
+
# Expose the port that FastAPI will run on
|
| 17 |
+
EXPOSE 7860
|
| 18 |
+
|
| 19 |
+
# Command to run the FastAPI application
|
| 20 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
TYPEGPT/typegpt.py
ADDED
|
@@ -0,0 +1,409 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
from typing import *
|
| 4 |
+
|
| 5 |
+
from webscout.AIutel import Optimizers
|
| 6 |
+
from webscout.AIutel import Conversation
|
| 7 |
+
from webscout.AIutel import AwesomePrompts
|
| 8 |
+
from webscout.AIbase import Provider
|
| 9 |
+
from webscout import exceptions
|
| 10 |
+
|
| 11 |
+
class TypeGPT(Provider):
|
| 12 |
+
"""
|
| 13 |
+
A class to interact with the TypeGPT.net API. Improved to match webscout standards.
|
| 14 |
+
"""
|
| 15 |
+
url = "https://chat.typegpt.net"
|
| 16 |
+
working = True
|
| 17 |
+
supports_message_history = True
|
| 18 |
+
|
| 19 |
+
models = [
|
| 20 |
+
# OpenAI Models
|
| 21 |
+
"gpt-3.5-turbo",
|
| 22 |
+
"gpt-3.5-turbo-202201",
|
| 23 |
+
"gpt-4o",
|
| 24 |
+
"gpt-4o-2024-05-13",
|
| 25 |
+
"o1-preview",
|
| 26 |
+
|
| 27 |
+
# Claude Models
|
| 28 |
+
"claude",
|
| 29 |
+
"claude-3-5-sonnet",
|
| 30 |
+
"claude-sonnet-3.5",
|
| 31 |
+
"claude-3-5-sonnet-20240620",
|
| 32 |
+
|
| 33 |
+
# Meta/LLaMA Models
|
| 34 |
+
"@cf/meta/llama-2-7b-chat-fp16",
|
| 35 |
+
"@cf/meta/llama-2-7b-chat-int8",
|
| 36 |
+
"@cf/meta/llama-3-8b-instruct",
|
| 37 |
+
"@cf/meta/llama-3.1-8b-instruct",
|
| 38 |
+
"@cf/meta-llama/llama-2-7b-chat-hf-lora",
|
| 39 |
+
"llama-3.1-405b",
|
| 40 |
+
"llama-3.1-70b",
|
| 41 |
+
"llama-3.1-8b",
|
| 42 |
+
"meta-llama/Llama-2-7b-chat-hf",
|
| 43 |
+
"meta-llama/Llama-3.1-70B-Instruct",
|
| 44 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
| 45 |
+
"meta-llama/Llama-3.2-11B-Vision-Instruct",
|
| 46 |
+
"meta-llama/Llama-3.2-1B-Instruct",
|
| 47 |
+
"meta-llama/Llama-3.2-3B-Instruct",
|
| 48 |
+
"meta-llama/Llama-3.2-90B-Vision-Instruct",
|
| 49 |
+
"meta-llama/Llama-Guard-3-8B",
|
| 50 |
+
"meta-llama/Meta-Llama-3-70B-Instruct",
|
| 51 |
+
"meta-llama/Meta-Llama-3-8B-Instruct",
|
| 52 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
| 53 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
| 54 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
| 55 |
+
|
| 56 |
+
# Mistral Models
|
| 57 |
+
"mistral",
|
| 58 |
+
"mistral-large",
|
| 59 |
+
"@cf/mistral/mistral-7b-instruct-v0.1",
|
| 60 |
+
"@cf/mistral/mistral-7b-instruct-v0.2-lora",
|
| 61 |
+
"@hf/mistralai/mistral-7b-instruct-v0.2",
|
| 62 |
+
"mistralai/Mistral-7B-Instruct-v0.2",
|
| 63 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 64 |
+
"mistralai/Mixtral-8x22B-Instruct-v0.1",
|
| 65 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 66 |
+
|
| 67 |
+
# Qwen Models
|
| 68 |
+
"@cf/qwen/qwen1.5-0.5b-chat",
|
| 69 |
+
"@cf/qwen/qwen1.5-1.8b-chat",
|
| 70 |
+
"@cf/qwen/qwen1.5-7b-chat-awq",
|
| 71 |
+
"@cf/qwen/qwen1.5-14b-chat-awq",
|
| 72 |
+
"Qwen/Qwen2.5-3B-Instruct",
|
| 73 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
| 74 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 75 |
+
|
| 76 |
+
# Google/Gemini Models
|
| 77 |
+
"@cf/google/gemma-2b-it-lora",
|
| 78 |
+
"@cf/google/gemma-7b-it-lora",
|
| 79 |
+
"@hf/google/gemma-7b-it",
|
| 80 |
+
"google/gemma-1.1-2b-it",
|
| 81 |
+
"google/gemma-1.1-7b-it",
|
| 82 |
+
"gemini-pro",
|
| 83 |
+
"gemini-1.5-pro",
|
| 84 |
+
"gemini-1.5-pro-latest",
|
| 85 |
+
"gemini-1.5-flash",
|
| 86 |
+
|
| 87 |
+
# Cohere Models
|
| 88 |
+
"c4ai-aya-23-35b",
|
| 89 |
+
"c4ai-aya-23-8b",
|
| 90 |
+
"command",
|
| 91 |
+
"command-light",
|
| 92 |
+
"command-light-nightly",
|
| 93 |
+
"command-nightly",
|
| 94 |
+
"command-r",
|
| 95 |
+
"command-r-08-2024",
|
| 96 |
+
"command-r-plus",
|
| 97 |
+
"command-r-plus-08-2024",
|
| 98 |
+
"rerank-english-v2.0",
|
| 99 |
+
"rerank-english-v3.0",
|
| 100 |
+
"rerank-multilingual-v2.0",
|
| 101 |
+
"rerank-multilingual-v3.0",
|
| 102 |
+
|
| 103 |
+
# Microsoft Models
|
| 104 |
+
"@cf/microsoft/phi-2",
|
| 105 |
+
"microsoft/DialoGPT-medium",
|
| 106 |
+
"microsoft/Phi-3-medium-4k-instruct",
|
| 107 |
+
"microsoft/Phi-3-mini-4k-instruct",
|
| 108 |
+
"microsoft/Phi-3.5-mini-instruct",
|
| 109 |
+
"microsoft/WizardLM-2-8x22B",
|
| 110 |
+
|
| 111 |
+
# Yi Models
|
| 112 |
+
"01-ai/Yi-1.5-34B-Chat",
|
| 113 |
+
"01-ai/Yi-34B-Chat",
|
| 114 |
+
|
| 115 |
+
# Specialized Models and Tools
|
| 116 |
+
"@cf/deepseek-ai/deepseek-math-7b-base",
|
| 117 |
+
"@cf/deepseek-ai/deepseek-math-7b-instruct",
|
| 118 |
+
"@cf/defog/sqlcoder-7b-2",
|
| 119 |
+
"@cf/openchat/openchat-3.5-0106",
|
| 120 |
+
"@cf/thebloke/discolm-german-7b-v1-awq",
|
| 121 |
+
"@cf/tiiuae/falcon-7b-instruct",
|
| 122 |
+
"@cf/tinyllama/tinyllama-1.1b-chat-v1.0",
|
| 123 |
+
"@hf/nexusflow/starling-lm-7b-beta",
|
| 124 |
+
"@hf/nousresearch/hermes-2-pro-mistral-7b",
|
| 125 |
+
"@hf/thebloke/deepseek-coder-6.7b-base-awq",
|
| 126 |
+
"@hf/thebloke/deepseek-coder-6.7b-instruct-awq",
|
| 127 |
+
"@hf/thebloke/llama-2-13b-chat-awq",
|
| 128 |
+
"@hf/thebloke/llamaguard-7b-awq",
|
| 129 |
+
"@hf/thebloke/neural-chat-7b-v3-1-awq",
|
| 130 |
+
"@hf/thebloke/openhermes-2.5-mistral-7b-awq",
|
| 131 |
+
"@hf/thebloke/zephyr-7b-beta-awq",
|
| 132 |
+
"AndroidDeveloper",
|
| 133 |
+
"AngularJSAgent",
|
| 134 |
+
"AzureAgent",
|
| 135 |
+
"BitbucketAgent",
|
| 136 |
+
"DigitalOceanAgent",
|
| 137 |
+
"DockerAgent",
|
| 138 |
+
"ElectronAgent",
|
| 139 |
+
"ErlangAgent",
|
| 140 |
+
"FastAPIAgent",
|
| 141 |
+
"FirebaseAgent",
|
| 142 |
+
"FlaskAgent",
|
| 143 |
+
"FlutterAgent",
|
| 144 |
+
"GitAgent",
|
| 145 |
+
"GitlabAgent",
|
| 146 |
+
"GoAgent",
|
| 147 |
+
"GodotAgent",
|
| 148 |
+
"GoogleCloudAgent",
|
| 149 |
+
"HTMLAgent",
|
| 150 |
+
"HerokuAgent",
|
| 151 |
+
"ImageGeneration",
|
| 152 |
+
"JavaAgent",
|
| 153 |
+
"JavaScriptAgent",
|
| 154 |
+
"MongoDBAgent",
|
| 155 |
+
"Next.jsAgent",
|
| 156 |
+
"PyTorchAgent",
|
| 157 |
+
"PythonAgent",
|
| 158 |
+
"ReactAgent",
|
| 159 |
+
"RepoMap",
|
| 160 |
+
"SwiftDeveloper",
|
| 161 |
+
"XcodeAgent",
|
| 162 |
+
"YoutubeAgent",
|
| 163 |
+
"blackboxai",
|
| 164 |
+
"blackboxai-pro",
|
| 165 |
+
"builderAgent",
|
| 166 |
+
"dify",
|
| 167 |
+
"flux",
|
| 168 |
+
"openchat/openchat-3.6-8b",
|
| 169 |
+
"rtist",
|
| 170 |
+
"searchgpt",
|
| 171 |
+
"sur",
|
| 172 |
+
"sur-mistral",
|
| 173 |
+
"unity"
|
| 174 |
+
]
|
| 175 |
+
|
| 176 |
+
def __init__(
|
| 177 |
+
self,
|
| 178 |
+
is_conversation: bool = True,
|
| 179 |
+
max_tokens: int = 4000, # Set a reasonable default
|
| 180 |
+
timeout: int = 30,
|
| 181 |
+
intro: str = None,
|
| 182 |
+
filepath: str = None,
|
| 183 |
+
update_file: bool = True,
|
| 184 |
+
proxies: dict = {},
|
| 185 |
+
history_offset: int = 10250,
|
| 186 |
+
act: str = None,
|
| 187 |
+
model: str = "claude-3-5-sonnet-20240620",
|
| 188 |
+
system_prompt: str = "You are a helpful assistant.",
|
| 189 |
+
temperature: float = 0.5,
|
| 190 |
+
presence_penalty: int = 0,
|
| 191 |
+
frequency_penalty: int = 0,
|
| 192 |
+
top_p: float = 1,
|
| 193 |
+
):
|
| 194 |
+
"""Initializes the TypeGPT API client."""
|
| 195 |
+
if model not in self.models:
|
| 196 |
+
raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(self.models)}")
|
| 197 |
+
|
| 198 |
+
self.session = requests.Session()
|
| 199 |
+
self.is_conversation = is_conversation
|
| 200 |
+
self.max_tokens_to_sample = max_tokens
|
| 201 |
+
self.api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
|
| 202 |
+
self.timeout = timeout
|
| 203 |
+
self.last_response = {}
|
| 204 |
+
self.last_response_status_code = None # Added line for status code
|
| 205 |
+
self.model = model
|
| 206 |
+
self.system_prompt = system_prompt
|
| 207 |
+
self.temperature = temperature
|
| 208 |
+
self.presence_penalty = presence_penalty
|
| 209 |
+
self.frequency_penalty = frequency_penalty
|
| 210 |
+
self.top_p = top_p
|
| 211 |
+
|
| 212 |
+
self.headers = {
|
| 213 |
+
"authority": "chat.typegpt.net",
|
| 214 |
+
"accept": "application/json, text/event-stream",
|
| 215 |
+
"accept-language": "en-US,en;q=0.9",
|
| 216 |
+
"content-type": "application/json",
|
| 217 |
+
"origin": "https://chat.typegpt.net",
|
| 218 |
+
"referer": "https://chat.typegpt.net/",
|
| 219 |
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
self.__available_optimizers = (
|
| 223 |
+
method
|
| 224 |
+
for method in dir(Optimizers)
|
| 225 |
+
if callable(getattr(Optimizers, method)) and not method.startswith("__")
|
| 226 |
+
)
|
| 227 |
+
Conversation.intro = (
|
| 228 |
+
AwesomePrompts().get_act(
|
| 229 |
+
act, raise_not_found=True, default=None, case_insensitive=True
|
| 230 |
+
)
|
| 231 |
+
if act
|
| 232 |
+
else intro or Conversation.intro
|
| 233 |
+
)
|
| 234 |
+
self.conversation = Conversation(
|
| 235 |
+
is_conversation, self.max_tokens_to_sample, filepath, update_file
|
| 236 |
+
)
|
| 237 |
+
self.conversation.history_offset = history_offset
|
| 238 |
+
self.session.proxies = proxies
|
| 239 |
+
|
| 240 |
+
def ask(
|
| 241 |
+
self,
|
| 242 |
+
prompt: str,
|
| 243 |
+
stream: bool = False,
|
| 244 |
+
raw: bool = False,
|
| 245 |
+
optimizer: str = None,
|
| 246 |
+
conversationally: bool = False,
|
| 247 |
+
) -> Dict[str, Any] | Generator:
|
| 248 |
+
"""Sends a prompt to the TypeGPT.net API and returns the response."""
|
| 249 |
+
conversation_prompt = self.conversation.gen_complete_prompt(prompt)
|
| 250 |
+
if optimizer:
|
| 251 |
+
if optimizer in self.__available_optimizers:
|
| 252 |
+
conversation_prompt = getattr(Optimizers, optimizer)(
|
| 253 |
+
conversation_prompt if conversationally else prompt
|
| 254 |
+
)
|
| 255 |
+
else:
|
| 256 |
+
raise exceptions.FailedToGenerateResponseError(
|
| 257 |
+
f"Optimizer is not one of {self.__available_optimizers}"
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
payload = {
|
| 261 |
+
"messages": [
|
| 262 |
+
{"role": "system", "content": self.system_prompt},
|
| 263 |
+
{"role": "user", "content": conversation_prompt}
|
| 264 |
+
],
|
| 265 |
+
"stream": stream,
|
| 266 |
+
"model": self.model,
|
| 267 |
+
"temperature": self.temperature,
|
| 268 |
+
"presence_penalty": self.presence_penalty,
|
| 269 |
+
"frequency_penalty": self.frequency_penalty,
|
| 270 |
+
"top_p": self.top_p,
|
| 271 |
+
"max_tokens": self.max_tokens_to_sample,
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
def for_stream():
|
| 275 |
+
response = self.session.post(
|
| 276 |
+
self.api_endpoint, headers=self.headers, json=payload, stream=True, timeout=self.timeout
|
| 277 |
+
)
|
| 278 |
+
self.last_response_status_code = response.status_code # Capture status code
|
| 279 |
+
if not response.ok:
|
| 280 |
+
raise exceptions.FailedToGenerateResponseError(
|
| 281 |
+
f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}"
|
| 282 |
+
)
|
| 283 |
+
message_load = ""
|
| 284 |
+
for line in response.iter_lines():
|
| 285 |
+
if line:
|
| 286 |
+
line = line.decode("utf-8")
|
| 287 |
+
if line.startswith("data: "):
|
| 288 |
+
line = line[6:] # Remove "data: " prefix
|
| 289 |
+
# Skip [DONE] message
|
| 290 |
+
if line.strip() == "[DONE]":
|
| 291 |
+
break
|
| 292 |
+
|
| 293 |
+
try:
|
| 294 |
+
data = json.loads(line)
|
| 295 |
+
|
| 296 |
+
# Extract and yield only new content
|
| 297 |
+
if 'choices' in data and len(data['choices']) > 0:
|
| 298 |
+
delta = data['choices'][0].get('delta', {})
|
| 299 |
+
if 'content' in delta:
|
| 300 |
+
new_content = delta['content']
|
| 301 |
+
message_load += new_content
|
| 302 |
+
# Yield only the new content
|
| 303 |
+
yield dict(text=new_content) if not raw else new_content
|
| 304 |
+
self.last_response = dict(text=message_load)
|
| 305 |
+
|
| 306 |
+
except json.JSONDecodeError:
|
| 307 |
+
continue
|
| 308 |
+
self.conversation.update_chat_history(prompt, self.get_message(self.last_response))
|
| 309 |
+
|
| 310 |
+
def for_non_stream():
|
| 311 |
+
response = self.session.post(self.api_endpoint, headers=self.headers, json=payload)
|
| 312 |
+
self.last_response_status_code = response.status_code # Capture status code
|
| 313 |
+
if not response.ok:
|
| 314 |
+
raise exceptions.FailedToGenerateResponseError(
|
| 315 |
+
f"Request failed - {response.status_code}: {response.text}"
|
| 316 |
+
)
|
| 317 |
+
self.last_response = response.json()
|
| 318 |
+
self.conversation.update_chat_history(prompt, self.get_message(self.last_response))
|
| 319 |
+
return self.last_response
|
| 320 |
+
|
| 321 |
+
return for_stream() if stream else for_non_stream()
|
| 322 |
+
|
| 323 |
+
def chat(
|
| 324 |
+
self,
|
| 325 |
+
prompt: str,
|
| 326 |
+
stream: bool = False,
|
| 327 |
+
optimizer: str = None,
|
| 328 |
+
conversationally: bool = False,
|
| 329 |
+
) -> str | Generator[str, None, None]:
|
| 330 |
+
"""Generate response `str` or stream."""
|
| 331 |
+
if stream:
|
| 332 |
+
gen = self.ask(
|
| 333 |
+
prompt, stream=True, optimizer=optimizer, conversationally=conversationally
|
| 334 |
+
)
|
| 335 |
+
for chunk in gen:
|
| 336 |
+
yield self.get_message(chunk) # Extract text from streamed chunks
|
| 337 |
+
else:
|
| 338 |
+
return self.get_message(self.ask(prompt, stream=False, optimizer=optimizer, conversationally=conversationally))
|
| 339 |
+
|
| 340 |
+
def get_message(self, response: Dict[str, Any]) -> str:
|
| 341 |
+
"""Retrieves message from response."""
|
| 342 |
+
if isinstance(response, str): # Handle raw responses
|
| 343 |
+
return response
|
| 344 |
+
elif isinstance(response, dict):
|
| 345 |
+
assert isinstance(response, dict), "Response should be of dict data-type only"
|
| 346 |
+
return response.get("text", "") # Extract text from dictionary response
|
| 347 |
+
else:
|
| 348 |
+
raise TypeError("Invalid response type. Expected str or dict.")
|
| 349 |
+
|
| 350 |
+
if __name__ == "__main__":
|
| 351 |
+
from rich import print
|
| 352 |
+
from rich.progress import Progress, BarColumn, TextColumn, TimeRemainingColumn, SpinnerColumn
|
| 353 |
+
from rich.console import Console
|
| 354 |
+
from rich.table import Table
|
| 355 |
+
import concurrent.futures
|
| 356 |
+
|
| 357 |
+
def make_api_call(thread_number, results):
|
| 358 |
+
ai = TypeGPT()
|
| 359 |
+
try:
|
| 360 |
+
ai.ask("Test message", stream=False)
|
| 361 |
+
status_code = ai.last_response_status_code
|
| 362 |
+
results[thread_number] = status_code
|
| 363 |
+
except Exception as e:
|
| 364 |
+
results[thread_number] = str(e)
|
| 365 |
+
|
| 366 |
+
results = {}
|
| 367 |
+
total_requests = 100
|
| 368 |
+
|
| 369 |
+
console = Console()
|
| 370 |
+
|
| 371 |
+
print("[bold magenta]Starting API Load Test with 100 simultaneous requests...[/bold magenta]\n")
|
| 372 |
+
|
| 373 |
+
with Progress(
|
| 374 |
+
SpinnerColumn(),
|
| 375 |
+
"[progress.description]{task.description}",
|
| 376 |
+
BarColumn(bar_width=None),
|
| 377 |
+
"[progress.percentage]{task.percentage:>3.0f}%",
|
| 378 |
+
TimeRemainingColumn(),
|
| 379 |
+
console=console,
|
| 380 |
+
) as progress:
|
| 381 |
+
task = progress.add_task("[cyan]Sending API Requests...", total=total_requests)
|
| 382 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=total_requests) as executor:
|
| 383 |
+
futures = {
|
| 384 |
+
executor.submit(make_api_call, i, results): i for i in range(total_requests)
|
| 385 |
+
}
|
| 386 |
+
for future in concurrent.futures.as_completed(futures):
|
| 387 |
+
progress.update(task, advance=1)
|
| 388 |
+
progress.stop()
|
| 389 |
+
|
| 390 |
+
# Process and display the results
|
| 391 |
+
successful_calls = sum(1 for status in results.values() if status == 200)
|
| 392 |
+
failed_calls = total_requests - successful_calls
|
| 393 |
+
|
| 394 |
+
print("\n[bold magenta]API Load Test Results:[/bold magenta]\n")
|
| 395 |
+
print(f"[bold green]Successful calls: {successful_calls}")
|
| 396 |
+
print(f"[bold red]Failed calls: {failed_calls}\n")
|
| 397 |
+
|
| 398 |
+
# Create a table to display detailed results
|
| 399 |
+
table = Table(show_header=True, header_style="bold blue")
|
| 400 |
+
table.add_column("Thread Number", justify="right", style="dim")
|
| 401 |
+
table.add_column("Status", style="bold")
|
| 402 |
+
|
| 403 |
+
for thread_number, status in results.items():
|
| 404 |
+
if status == 200:
|
| 405 |
+
table.add_row(f"{thread_number}", f"[green]Success[/green]")
|
| 406 |
+
else:
|
| 407 |
+
table.add_row(f"{thread_number}", f"[red]Failed ({status})[/red]")
|
| 408 |
+
|
| 409 |
+
print(table)
|
TYPEGPT/typegpt_normal.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
|
| 4 |
+
# List of available models
|
| 5 |
+
models = [
|
| 6 |
+
# OpenAI Models
|
| 7 |
+
"gpt-3.5-turbo",
|
| 8 |
+
"gpt-3.5-turbo-202201",
|
| 9 |
+
"gpt-4o",
|
| 10 |
+
"gpt-4o-2024-05-13",
|
| 11 |
+
"o1-preview",
|
| 12 |
+
|
| 13 |
+
# Claude Models
|
| 14 |
+
"claude",
|
| 15 |
+
"claude-3-5-sonnet",
|
| 16 |
+
"claude-sonnet-3.5",
|
| 17 |
+
"claude-3-5-sonnet-20240620",
|
| 18 |
+
|
| 19 |
+
# Meta/LLaMA Models
|
| 20 |
+
"@cf/meta/llama-2-7b-chat-fp16",
|
| 21 |
+
"@cf/meta/llama-2-7b-chat-int8",
|
| 22 |
+
"@cf/meta/llama-3-8b-instruct",
|
| 23 |
+
"@cf/meta/llama-3.1-8b-instruct",
|
| 24 |
+
"@cf/meta-llama/llama-2-7b-chat-hf-lora",
|
| 25 |
+
"llama-3.1-405b",
|
| 26 |
+
"llama-3.1-70b",
|
| 27 |
+
"llama-3.1-8b",
|
| 28 |
+
"meta-llama/Llama-2-7b-chat-hf",
|
| 29 |
+
"meta-llama/Llama-3.1-70B-Instruct",
|
| 30 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
| 31 |
+
"meta-llama/Llama-3.2-11B-Vision-Instruct",
|
| 32 |
+
"meta-llama/Llama-3.2-1B-Instruct",
|
| 33 |
+
"meta-llama/Llama-3.2-3B-Instruct",
|
| 34 |
+
"meta-llama/Llama-3.2-90B-Vision-Instruct",
|
| 35 |
+
"meta-llama/Llama-Guard-3-8B",
|
| 36 |
+
"meta-llama/Meta-Llama-3-70B-Instruct",
|
| 37 |
+
"meta-llama/Meta-Llama-3-8B-Instruct",
|
| 38 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
| 39 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
| 40 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
| 41 |
+
|
| 42 |
+
# Mistral Models
|
| 43 |
+
"mistral",
|
| 44 |
+
"mistral-large",
|
| 45 |
+
"@cf/mistral/mistral-7b-instruct-v0.1",
|
| 46 |
+
"@cf/mistral/mistral-7b-instruct-v0.2-lora",
|
| 47 |
+
"@hf/mistralai/mistral-7b-instruct-v0.2",
|
| 48 |
+
"mistralai/Mistral-7B-Instruct-v0.2",
|
| 49 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 50 |
+
"mistralai/Mixtral-8x22B-Instruct-v0.1",
|
| 51 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 52 |
+
|
| 53 |
+
# Qwen Models
|
| 54 |
+
"@cf/qwen/qwen1.5-0.5b-chat",
|
| 55 |
+
"@cf/qwen/qwen1.5-1.8b-chat",
|
| 56 |
+
"@cf/qwen/qwen1.5-7b-chat-awq",
|
| 57 |
+
"@cf/qwen/qwen1.5-14b-chat-awq",
|
| 58 |
+
"Qwen/Qwen2.5-3B-Instruct",
|
| 59 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
| 60 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 61 |
+
|
| 62 |
+
# Google/Gemini Models
|
| 63 |
+
"@cf/google/gemma-2b-it-lora",
|
| 64 |
+
"@cf/google/gemma-7b-it-lora",
|
| 65 |
+
"@hf/google/gemma-7b-it",
|
| 66 |
+
"google/gemma-1.1-2b-it",
|
| 67 |
+
"google/gemma-1.1-7b-it",
|
| 68 |
+
"gemini-pro",
|
| 69 |
+
"gemini-1.5-pro",
|
| 70 |
+
"gemini-1.5-pro-latest",
|
| 71 |
+
"gemini-1.5-flash",
|
| 72 |
+
|
| 73 |
+
# Cohere Models
|
| 74 |
+
"c4ai-aya-23-35b",
|
| 75 |
+
"c4ai-aya-23-8b",
|
| 76 |
+
"command",
|
| 77 |
+
"command-light",
|
| 78 |
+
"command-light-nightly",
|
| 79 |
+
"command-nightly",
|
| 80 |
+
"command-r",
|
| 81 |
+
"command-r-08-2024",
|
| 82 |
+
"command-r-plus",
|
| 83 |
+
"command-r-plus-08-2024",
|
| 84 |
+
"rerank-english-v2.0",
|
| 85 |
+
"rerank-english-v3.0",
|
| 86 |
+
"rerank-multilingual-v2.0",
|
| 87 |
+
"rerank-multilingual-v3.0",
|
| 88 |
+
|
| 89 |
+
# Microsoft Models
|
| 90 |
+
"@cf/microsoft/phi-2",
|
| 91 |
+
"microsoft/DialoGPT-medium",
|
| 92 |
+
"microsoft/Phi-3-medium-4k-instruct",
|
| 93 |
+
"microsoft/Phi-3-mini-4k-instruct",
|
| 94 |
+
"microsoft/Phi-3.5-mini-instruct",
|
| 95 |
+
"microsoft/WizardLM-2-8x22B",
|
| 96 |
+
|
| 97 |
+
# Yi Models
|
| 98 |
+
"01-ai/Yi-1.5-34B-Chat",
|
| 99 |
+
"01-ai/Yi-34B-Chat",
|
| 100 |
+
|
| 101 |
+
# Specialized Models and Tools
|
| 102 |
+
"@cf/deepseek-ai/deepseek-math-7b-base",
|
| 103 |
+
"@cf/deepseek-ai/deepseek-math-7b-instruct",
|
| 104 |
+
"@cf/defog/sqlcoder-7b-2",
|
| 105 |
+
"@cf/openchat/openchat-3.5-0106",
|
| 106 |
+
"@cf/thebloke/discolm-german-7b-v1-awq",
|
| 107 |
+
"@cf/tiiuae/falcon-7b-instruct",
|
| 108 |
+
"@cf/tinyllama/tinyllama-1.1b-chat-v1.0",
|
| 109 |
+
"@hf/nexusflow/starling-lm-7b-beta",
|
| 110 |
+
"@hf/nousresearch/hermes-2-pro-mistral-7b",
|
| 111 |
+
"@hf/thebloke/deepseek-coder-6.7b-base-awq",
|
| 112 |
+
"@hf/thebloke/deepseek-coder-6.7b-instruct-awq",
|
| 113 |
+
"@hf/thebloke/llama-2-13b-chat-awq",
|
| 114 |
+
"@hf/thebloke/llamaguard-7b-awq",
|
| 115 |
+
"@hf/thebloke/neural-chat-7b-v3-1-awq",
|
| 116 |
+
"@hf/thebloke/openhermes-2.5-mistral-7b-awq",
|
| 117 |
+
"@hf/thebloke/zephyr-7b-beta-awq",
|
| 118 |
+
"AndroidDeveloper",
|
| 119 |
+
"AngularJSAgent",
|
| 120 |
+
"AzureAgent",
|
| 121 |
+
"BitbucketAgent",
|
| 122 |
+
"DigitalOceanAgent",
|
| 123 |
+
"DockerAgent",
|
| 124 |
+
"ElectronAgent",
|
| 125 |
+
"ErlangAgent",
|
| 126 |
+
"FastAPIAgent",
|
| 127 |
+
"FirebaseAgent",
|
| 128 |
+
"FlaskAgent",
|
| 129 |
+
"FlutterAgent",
|
| 130 |
+
"GitAgent",
|
| 131 |
+
"GitlabAgent",
|
| 132 |
+
"GoAgent",
|
| 133 |
+
"GodotAgent",
|
| 134 |
+
"GoogleCloudAgent",
|
| 135 |
+
"HTMLAgent",
|
| 136 |
+
"HerokuAgent",
|
| 137 |
+
"ImageGeneration",
|
| 138 |
+
"JavaAgent",
|
| 139 |
+
"JavaScriptAgent",
|
| 140 |
+
"MongoDBAgent",
|
| 141 |
+
"Next.jsAgent",
|
| 142 |
+
"PyTorchAgent",
|
| 143 |
+
"PythonAgent",
|
| 144 |
+
"ReactAgent",
|
| 145 |
+
"RepoMap",
|
| 146 |
+
"SwiftDeveloper",
|
| 147 |
+
"XcodeAgent",
|
| 148 |
+
"YoutubeAgent",
|
| 149 |
+
"blackboxai",
|
| 150 |
+
"blackboxai-pro",
|
| 151 |
+
"builderAgent",
|
| 152 |
+
"dify",
|
| 153 |
+
"flux",
|
| 154 |
+
"openchat/openchat-3.6-8b",
|
| 155 |
+
"rtist",
|
| 156 |
+
"searchgpt",
|
| 157 |
+
"sur",
|
| 158 |
+
"sur-mistral",
|
| 159 |
+
"unity"
|
| 160 |
+
]
|
| 161 |
+
|
| 162 |
+
# Parameters
|
| 163 |
+
is_conversation = True
|
| 164 |
+
max_tokens = 4000 # Set a reasonable default
|
| 165 |
+
timeout = 30
|
| 166 |
+
model = "claude-3-5-sonnet-20240620"
|
| 167 |
+
system_prompt = "You are a helpful assistant."
|
| 168 |
+
temperature = 0.5
|
| 169 |
+
presence_penalty = 0
|
| 170 |
+
frequency_penalty = 0
|
| 171 |
+
top_p = 1
|
| 172 |
+
|
| 173 |
+
if model not in models:
|
| 174 |
+
raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(models)}")
|
| 175 |
+
|
| 176 |
+
session = requests.Session()
|
| 177 |
+
api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
|
| 178 |
+
|
| 179 |
+
headers = {
|
| 180 |
+
"authority": "chat.typegpt.net",
|
| 181 |
+
"accept": "application/json, text/event-stream",
|
| 182 |
+
"accept-language": "en-US,en;q=0.9",
|
| 183 |
+
"content-type": "application/json",
|
| 184 |
+
"origin": "https://chat.typegpt.net",
|
| 185 |
+
"referer": "https://chat.typegpt.net/",
|
| 186 |
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
# Prompt to send
|
| 190 |
+
prompt = "What is the knowledge cut off? Be specific and also specify the month, year and date. If not sure, then provide approximate"
|
| 191 |
+
|
| 192 |
+
# Payload
|
| 193 |
+
payload = {
|
| 194 |
+
"messages": [
|
| 195 |
+
{"role": "system", "content": system_prompt},
|
| 196 |
+
{"role": "user", "content": prompt}
|
| 197 |
+
],
|
| 198 |
+
"stream": True,
|
| 199 |
+
"model": model,
|
| 200 |
+
"temperature": temperature,
|
| 201 |
+
"presence_penalty": presence_penalty,
|
| 202 |
+
"frequency_penalty": frequency_penalty,
|
| 203 |
+
"top_p": top_p,
|
| 204 |
+
"max_tokens": max_tokens,
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
# Make the API request
|
| 208 |
+
response = session.post(
|
| 209 |
+
api_endpoint, headers=headers, json=payload, stream=True, timeout=timeout
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
if not response.ok:
|
| 213 |
+
raise Exception(f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}")
|
| 214 |
+
|
| 215 |
+
# Process the streamed response
|
| 216 |
+
for line in response.iter_lines():
|
| 217 |
+
if line:
|
| 218 |
+
line = line.decode("utf-8")
|
| 219 |
+
if line.startswith("data: "):
|
| 220 |
+
line = line[6:] # Remove "data: " prefix
|
| 221 |
+
if line.strip() == "[DONE]":
|
| 222 |
+
break
|
| 223 |
+
try:
|
| 224 |
+
data = json.loads(line)
|
| 225 |
+
if 'choices' in data and len(data['choices']) > 0:
|
| 226 |
+
delta = data['choices'][0].get('delta', {})
|
| 227 |
+
if 'content' in delta:
|
| 228 |
+
new_content = delta['content']
|
| 229 |
+
print(new_content, end="", flush=True)
|
| 230 |
+
except json.JSONDecodeError:
|
| 231 |
+
continue
|
api_info.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
developer_info = {
|
| 2 |
+
'developer': 'Devs Do Code',
|
| 3 |
+
'contact': {
|
| 4 |
+
'Telegram': 'https://t.me/devsdocode',
|
| 5 |
+
'YouTube Channel': 'https://www.youtube.com/@DevsDoCode',
|
| 6 |
+
'LinkedIn': 'https://www.linkedin.com/in/developer-sreejan/',
|
| 7 |
+
'Discord Server': 'https://discord.gg/ehwfVtsAts',
|
| 8 |
+
'Instagram': {
|
| 9 |
+
'Personal': 'https://www.instagram.com/sree.shades_/',
|
| 10 |
+
'Channel': 'https://www.instagram.com/devsdocode_/'
|
| 11 |
+
}
|
| 12 |
+
}
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
endpoint = {
|
| 16 |
+
'route': "/generate",
|
| 17 |
+
'params': {
|
| 18 |
+
"query": "[SEARCH QUERY]"
|
| 19 |
+
},
|
| 20 |
+
'optional_params': {
|
| 21 |
+
"model": "[]",
|
| 22 |
+
"temperature": "[]",
|
| 23 |
+
"system_prompt": "[]"
|
| 24 |
+
},
|
| 25 |
+
'url_demo' : '/generate?query=Who is Devs Do Code&&model=command-r-plus&&temperature=0.7&&system_prompt=Your Owner is "Devs Do Code"'
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
model_providers = {
|
| 29 |
+
"OpenAI": {
|
| 30 |
+
"models": [
|
| 31 |
+
"gpt-3.5-turbo",
|
| 32 |
+
"gpt-3.5-turbo-202201",
|
| 33 |
+
"gpt-4o",
|
| 34 |
+
"gpt-4o-2024-05-13",
|
| 35 |
+
"o1-preview"
|
| 36 |
+
],
|
| 37 |
+
"description": "OpenAI's GPT language models"
|
| 38 |
+
},
|
| 39 |
+
"Anthropic": {
|
| 40 |
+
"models": [
|
| 41 |
+
"claude",
|
| 42 |
+
"claude-3-5-sonnet",
|
| 43 |
+
"claude-sonnet-3.5",
|
| 44 |
+
"claude-3-5-sonnet-20240620"
|
| 45 |
+
],
|
| 46 |
+
"description": "Anthropic's Claude language models"
|
| 47 |
+
},
|
| 48 |
+
"Meta": {
|
| 49 |
+
"models": [
|
| 50 |
+
"llama-2-7b-chat",
|
| 51 |
+
"llama-2-7b-chat-int8",
|
| 52 |
+
"llama-3-8b-instruct",
|
| 53 |
+
"llama-3.1-8b-instruct",
|
| 54 |
+
"llama-3.1-405b",
|
| 55 |
+
"llama-3.1-70b",
|
| 56 |
+
"llama-3.1-8b",
|
| 57 |
+
"llama-3.2-11b-vision",
|
| 58 |
+
"llama-3.2-1b",
|
| 59 |
+
"llama-3.2-3b",
|
| 60 |
+
"llama-3.2-90b-vision",
|
| 61 |
+
"llama-guard-3-8b"
|
| 62 |
+
],
|
| 63 |
+
"description": "Meta's LLaMA language models"
|
| 64 |
+
},
|
| 65 |
+
"Mistral": {
|
| 66 |
+
"models": [
|
| 67 |
+
"mistral",
|
| 68 |
+
"mistral-large",
|
| 69 |
+
"mistral-7b-instruct-v0.1",
|
| 70 |
+
"mistral-7b-instruct-v0.2",
|
| 71 |
+
"mistral-7b-instruct-v0.3",
|
| 72 |
+
"mixtral-8x22b",
|
| 73 |
+
"mixtral-8x7b"
|
| 74 |
+
],
|
| 75 |
+
"description": "Mistral AI's language models"
|
| 76 |
+
},
|
| 77 |
+
"Qwen": {
|
| 78 |
+
"models": [
|
| 79 |
+
"qwen1.5-0.5b-chat",
|
| 80 |
+
"qwen1.5-1.8b-chat",
|
| 81 |
+
"qwen1.5-7b-chat",
|
| 82 |
+
"qwen1.5-14b-chat",
|
| 83 |
+
"qwen2.5-3b",
|
| 84 |
+
"qwen2.5-72b",
|
| 85 |
+
"qwen2.5-coder-32b"
|
| 86 |
+
],
|
| 87 |
+
"description": "Qwen's language models"
|
| 88 |
+
},
|
| 89 |
+
"Google": {
|
| 90 |
+
"models": [
|
| 91 |
+
"gemma-2b",
|
| 92 |
+
"gemma-7b",
|
| 93 |
+
"gemini-pro",
|
| 94 |
+
"gemini-1.5-pro",
|
| 95 |
+
"gemini-1.5-pro-latest",
|
| 96 |
+
"gemini-1.5-flash"
|
| 97 |
+
],
|
| 98 |
+
"description": "Google's Gemini and Gemma models"
|
| 99 |
+
},
|
| 100 |
+
"Cohere": {
|
| 101 |
+
"models": [
|
| 102 |
+
"aya-23-35b",
|
| 103 |
+
"aya-23-8b",
|
| 104 |
+
"command",
|
| 105 |
+
"command-light",
|
| 106 |
+
"command-nightly",
|
| 107 |
+
"command-r",
|
| 108 |
+
"command-r-plus",
|
| 109 |
+
"rerank-english-v2.0",
|
| 110 |
+
"rerank-english-v3.0",
|
| 111 |
+
"rerank-multilingual-v2.0",
|
| 112 |
+
"rerank-multilingual-v3.0"
|
| 113 |
+
],
|
| 114 |
+
"description": "Cohere's language models"
|
| 115 |
+
},
|
| 116 |
+
"Microsoft": {
|
| 117 |
+
"models": [
|
| 118 |
+
"phi-2",
|
| 119 |
+
"dialogpt-medium",
|
| 120 |
+
"phi-3-medium-4k",
|
| 121 |
+
"phi-3-mini-4k",
|
| 122 |
+
"phi-3.5-mini",
|
| 123 |
+
"wizardlm-2-8x22b"
|
| 124 |
+
],
|
| 125 |
+
"description": "Microsoft's language models"
|
| 126 |
+
},
|
| 127 |
+
"Yi": {
|
| 128 |
+
"models": [
|
| 129 |
+
"yi-1.5-34b-chat",
|
| 130 |
+
"yi-34b-chat"
|
| 131 |
+
],
|
| 132 |
+
"description": "01.AI's Yi language models"
|
| 133 |
+
}
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
error_message = {
|
| 137 |
+
'developer_contact': {
|
| 138 |
+
'Telegram': 'https://t.me/DevsDoCode',
|
| 139 |
+
'Instagram': 'https://www.instagram.com/sree.shades_/',
|
| 140 |
+
'Discord': 'https://discord.gg/ehwfVtsAts',
|
| 141 |
+
'LinkedIn': 'https://www.linkedin.com/in/developer-sreejan/',
|
| 142 |
+
'Twitter': 'https://twitter.com/Anand_Sreejan'
|
| 143 |
+
},
|
| 144 |
+
'error': 'Oops! Something went wrong. Please contact the developer Devs Do Code.'
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
default_info = """This API is developed and being maintained by Devs Do Code (Sreejan).
|
| 148 |
+
|
| 149 |
+
**About the Developer**
|
| 150 |
+
|
| 151 |
+
Sreejan, a high school student from Patna, Bihar, India, has emerged as a notable figure in the technology sector.
|
| 152 |
+
His creation of an API is a testament to his dedication and expertise. Despite his youth, Sreejan's contributions
|
| 153 |
+
to artificial intelligence and machine learning are significant. As an AI & ML Engineer, he specializes in Deep Learning,
|
| 154 |
+
Natural Language Processing (NLP), and Robotics, with proficiency in Python, Java, and Mobile App Development.
|
| 155 |
+
Beyond his role as a technology consumer, Sreejan is an active open-source contributor, notably to projects like Hugging Face.
|
| 156 |
+
|
| 157 |
+
He is also recognized for his role in community development, particularly through "Devs Do Code," a platform he
|
| 158 |
+
founded to provide quality coding resources, tutorials, and projects. His mission is to equip developers with the
|
| 159 |
+
necessary skills to thrive in the ever-evolving tech landscape. Sreejan's commitment to sharing knowledge and
|
| 160 |
+
fostering collaboration is evident in his accessibility and engagement with the community across various platforms.
|
| 161 |
+
|
| 162 |
+
Connect with Sreejan and follow his journey in technology and innovation:
|
| 163 |
+
|
| 164 |
+
- Telegram: https://t.me/devsdocode
|
| 165 |
+
- YouTube Channel: https://www.youtube.com/@DevsDoCode
|
| 166 |
+
- LinkedIn: https://www.linkedin.com/in/developer-sreejan/
|
| 167 |
+
- Discord Server: https://discord.gg/ehwfVtsAts
|
| 168 |
+
- Instagram
|
| 169 |
+
- Personal: https://www.instagram.com/sree.shades_/
|
| 170 |
+
- Channel: https://www.instagram.com/devsdocode_/
|
| 171 |
+
|
| 172 |
+
Sreejan stands out not only as a developer but as a visionary and leader, driving change in the tech industry
|
| 173 |
+
with his passion, expertise, and unwavering commitment to community building. He continues to shape the
|
| 174 |
+
future of technology, one line of code at a time.
|
| 175 |
+
"""
|
| 176 |
+
|
fastapi_app.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, Request, Response
|
| 2 |
+
from fastapi.responses import JSONResponse, StreamingResponse
|
| 3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
+
import uvicorn
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
from typegpt_api import generate, model_mapping, simplified_models
|
| 8 |
+
from api_info import developer_info, model_providers
|
| 9 |
+
|
| 10 |
+
app = FastAPI()
|
| 11 |
+
|
| 12 |
+
# Set up CORS middleware if needed
|
| 13 |
+
app.add_middleware(
|
| 14 |
+
CORSMiddleware,
|
| 15 |
+
allow_origins=["*"],
|
| 16 |
+
allow_credentials=True,
|
| 17 |
+
allow_methods=["*"],
|
| 18 |
+
allow_headers=["*"],
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
@app.get("/health_check")
|
| 22 |
+
async def health_check():
|
| 23 |
+
return {"status": "OK"}
|
| 24 |
+
|
| 25 |
+
@app.get("/models")
|
| 26 |
+
async def get_models():
|
| 27 |
+
try:
|
| 28 |
+
response = {
|
| 29 |
+
"object": "list",
|
| 30 |
+
"data": []
|
| 31 |
+
}
|
| 32 |
+
for provider, info in model_providers.items():
|
| 33 |
+
for model in info["models"]:
|
| 34 |
+
response["data"].append({
|
| 35 |
+
"id": model,
|
| 36 |
+
"object": "model",
|
| 37 |
+
"provider": provider,
|
| 38 |
+
"description": info["description"]
|
| 39 |
+
})
|
| 40 |
+
|
| 41 |
+
return JSONResponse(content=response)
|
| 42 |
+
except Exception as e:
|
| 43 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
| 44 |
+
|
| 45 |
+
@app.post("/chat/completions")
|
| 46 |
+
async def chat_completions(request: Request):
|
| 47 |
+
# Receive the JSON payload
|
| 48 |
+
try:
|
| 49 |
+
body = await request.json()
|
| 50 |
+
except Exception as e:
|
| 51 |
+
return JSONResponse(content={"error": "Invalid JSON payload"}, status_code=400)
|
| 52 |
+
|
| 53 |
+
# Extract parameters
|
| 54 |
+
model = body.get("model")
|
| 55 |
+
messages = body.get("messages")
|
| 56 |
+
temperature = body.get("temperature", 0.7)
|
| 57 |
+
top_p = body.get("top_p", 1.0)
|
| 58 |
+
n = body.get("n", 1)
|
| 59 |
+
stream = body.get("stream", False)
|
| 60 |
+
stop = body.get("stop")
|
| 61 |
+
max_tokens = body.get("max_tokens")
|
| 62 |
+
presence_penalty = body.get("presence_penalty", 0.0)
|
| 63 |
+
frequency_penalty = body.get("frequency_penalty", 0.0)
|
| 64 |
+
logit_bias = body.get("logit_bias")
|
| 65 |
+
user = body.get("user")
|
| 66 |
+
timeout = 30 # or set based on your preference
|
| 67 |
+
|
| 68 |
+
# Validate required parameters
|
| 69 |
+
if not model:
|
| 70 |
+
return JSONResponse(content={"error": "The 'model' parameter is required."}, status_code=400)
|
| 71 |
+
if not messages:
|
| 72 |
+
return JSONResponse(content={"error": "The 'messages' parameter is required."}, status_code=400)
|
| 73 |
+
|
| 74 |
+
# Call the generate function
|
| 75 |
+
try:
|
| 76 |
+
if stream:
|
| 77 |
+
async def generate_stream():
|
| 78 |
+
response = generate(
|
| 79 |
+
model=model,
|
| 80 |
+
messages=messages,
|
| 81 |
+
temperature=temperature,
|
| 82 |
+
top_p=top_p,
|
| 83 |
+
n=n,
|
| 84 |
+
stream=True,
|
| 85 |
+
stop=stop,
|
| 86 |
+
max_tokens=max_tokens,
|
| 87 |
+
presence_penalty=presence_penalty,
|
| 88 |
+
frequency_penalty=frequency_penalty,
|
| 89 |
+
logit_bias=logit_bias,
|
| 90 |
+
user=user,
|
| 91 |
+
timeout=timeout,
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
for chunk in response:
|
| 95 |
+
yield f"data: {json.dumps(chunk)}\n\n"
|
| 96 |
+
yield "data: [DONE]\n\n"
|
| 97 |
+
|
| 98 |
+
return StreamingResponse(
|
| 99 |
+
generate_stream(),
|
| 100 |
+
media_type="text/event-stream",
|
| 101 |
+
headers={
|
| 102 |
+
"Cache-Control": "no-cache",
|
| 103 |
+
"Connection": "keep-alive",
|
| 104 |
+
"Transfer-Encoding": "chunked"
|
| 105 |
+
}
|
| 106 |
+
)
|
| 107 |
+
else:
|
| 108 |
+
response = generate(
|
| 109 |
+
model=model,
|
| 110 |
+
messages=messages,
|
| 111 |
+
temperature=temperature,
|
| 112 |
+
top_p=top_p,
|
| 113 |
+
n=n,
|
| 114 |
+
stream=False,
|
| 115 |
+
stop=stop,
|
| 116 |
+
max_tokens=max_tokens,
|
| 117 |
+
presence_penalty=presence_penalty,
|
| 118 |
+
frequency_penalty=frequency_penalty,
|
| 119 |
+
logit_bias=logit_bias,
|
| 120 |
+
user=user,
|
| 121 |
+
timeout=timeout,
|
| 122 |
+
)
|
| 123 |
+
return JSONResponse(content=response)
|
| 124 |
+
except Exception as e:
|
| 125 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
| 126 |
+
|
| 127 |
+
@app.get("/developer_info")
|
| 128 |
+
async def get_developer_info():
|
| 129 |
+
return JSONResponse(content=developer_info)
|
| 130 |
+
|
| 131 |
+
if __name__ == "__main__":
|
| 132 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
flask_app.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, Response, jsonify, stream_with_context
|
| 2 |
+
from flask_cors import CORS
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
from typegpt_api import generate, model_mapping, simplified_models
|
| 6 |
+
from api_info import developer_info, model_providers
|
| 7 |
+
|
| 8 |
+
app = Flask(__name__)
|
| 9 |
+
|
| 10 |
+
# Set up CORS middleware if needed
|
| 11 |
+
CORS(app, resources={
|
| 12 |
+
r"/*": {
|
| 13 |
+
"origins": "*",
|
| 14 |
+
"allow_credentials": True,
|
| 15 |
+
"methods": ["*"],
|
| 16 |
+
"headers": ["*"]
|
| 17 |
+
}
|
| 18 |
+
})
|
| 19 |
+
|
| 20 |
+
@app.route("/health_check", methods=['GET'])
|
| 21 |
+
def health_check():
|
| 22 |
+
return jsonify({"status": "OK"})
|
| 23 |
+
|
| 24 |
+
@app.route("/models", methods=['GET'])
|
| 25 |
+
def get_models():
|
| 26 |
+
try:
|
| 27 |
+
response = {
|
| 28 |
+
"object": "list",
|
| 29 |
+
"data": []
|
| 30 |
+
}
|
| 31 |
+
for provider, info in model_providers.items():
|
| 32 |
+
for model in info["models"]:
|
| 33 |
+
response["data"].append({
|
| 34 |
+
"id": model,
|
| 35 |
+
"object": "model",
|
| 36 |
+
"provider": provider,
|
| 37 |
+
"description": info["description"]
|
| 38 |
+
})
|
| 39 |
+
|
| 40 |
+
return jsonify(response)
|
| 41 |
+
except Exception as e:
|
| 42 |
+
return jsonify({"error": str(e)}), 500
|
| 43 |
+
|
| 44 |
+
@app.route("/chat/completions", methods=['POST'])
|
| 45 |
+
def chat_completions():
|
| 46 |
+
# Receive the JSON payload
|
| 47 |
+
try:
|
| 48 |
+
body = request.get_json()
|
| 49 |
+
except Exception as e:
|
| 50 |
+
return jsonify({"error": "Invalid JSON payload"}), 400
|
| 51 |
+
|
| 52 |
+
# Extract parameters
|
| 53 |
+
model = body.get("model")
|
| 54 |
+
messages = body.get("messages")
|
| 55 |
+
temperature = body.get("temperature", 0.7)
|
| 56 |
+
top_p = body.get("top_p", 1.0)
|
| 57 |
+
n = body.get("n", 1)
|
| 58 |
+
stream = body.get("stream", False)
|
| 59 |
+
stop = body.get("stop")
|
| 60 |
+
max_tokens = body.get("max_tokens")
|
| 61 |
+
presence_penalty = body.get("presence_penalty", 0.0)
|
| 62 |
+
frequency_penalty = body.get("frequency_penalty", 0.0)
|
| 63 |
+
logit_bias = body.get("logit_bias")
|
| 64 |
+
user = body.get("user")
|
| 65 |
+
timeout = 30 # or set based on your preference
|
| 66 |
+
|
| 67 |
+
# Validate required parameters
|
| 68 |
+
if not model:
|
| 69 |
+
return jsonify({"error": "The 'model' parameter is required."}), 400
|
| 70 |
+
if not messages:
|
| 71 |
+
return jsonify({"error": "The 'messages' parameter is required."}), 400
|
| 72 |
+
|
| 73 |
+
# Call the generate function
|
| 74 |
+
try:
|
| 75 |
+
if stream:
|
| 76 |
+
def generate_stream():
|
| 77 |
+
response = generate(
|
| 78 |
+
model=model,
|
| 79 |
+
messages=messages,
|
| 80 |
+
temperature=temperature,
|
| 81 |
+
top_p=top_p,
|
| 82 |
+
n=n,
|
| 83 |
+
stream=True,
|
| 84 |
+
stop=stop,
|
| 85 |
+
max_tokens=max_tokens,
|
| 86 |
+
presence_penalty=presence_penalty,
|
| 87 |
+
frequency_penalty=frequency_penalty,
|
| 88 |
+
logit_bias=logit_bias,
|
| 89 |
+
user=user,
|
| 90 |
+
timeout=timeout,
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
for chunk in response:
|
| 94 |
+
yield f"data: {json.dumps(chunk)}\n\n"
|
| 95 |
+
yield "data: [DONE]\n\n"
|
| 96 |
+
|
| 97 |
+
return Response(
|
| 98 |
+
stream_with_context(generate_stream()),
|
| 99 |
+
mimetype="text/event-stream",
|
| 100 |
+
headers={
|
| 101 |
+
"Cache-Control": "no-cache",
|
| 102 |
+
"Connection": "keep-alive",
|
| 103 |
+
"Transfer-Encoding": "chunked"
|
| 104 |
+
}
|
| 105 |
+
)
|
| 106 |
+
else:
|
| 107 |
+
response = generate(
|
| 108 |
+
model=model,
|
| 109 |
+
messages=messages,
|
| 110 |
+
temperature=temperature,
|
| 111 |
+
top_p=top_p,
|
| 112 |
+
n=n,
|
| 113 |
+
stream=False,
|
| 114 |
+
stop=stop,
|
| 115 |
+
max_tokens=max_tokens,
|
| 116 |
+
presence_penalty=presence_penalty,
|
| 117 |
+
frequency_penalty=frequency_penalty,
|
| 118 |
+
logit_bias=logit_bias,
|
| 119 |
+
user=user,
|
| 120 |
+
timeout=timeout,
|
| 121 |
+
)
|
| 122 |
+
return jsonify(response)
|
| 123 |
+
except Exception as e:
|
| 124 |
+
return jsonify({"error": str(e)}), 500
|
| 125 |
+
|
| 126 |
+
@app.route("/developer_info", methods=['GET'])
|
| 127 |
+
def get_developer_info():
|
| 128 |
+
return jsonify(developer_info)
|
| 129 |
+
|
| 130 |
+
if __name__ == "__main__":
|
| 131 |
+
app.run(host="0.0.0.0", port=8000)
|
query.md
ADDED
|
@@ -0,0 +1,384 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
```python
|
| 2 |
+
import requests
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
# Build model mapping
|
| 6 |
+
original_models = [
|
| 7 |
+
# OpenAI Models
|
| 8 |
+
"gpt-3.5-turbo",
|
| 9 |
+
"gpt-3.5-turbo-202201",
|
| 10 |
+
"gpt-4o",
|
| 11 |
+
"gpt-4o-2024-05-13",
|
| 12 |
+
"o1-preview",
|
| 13 |
+
|
| 14 |
+
# Claude Models
|
| 15 |
+
"claude",
|
| 16 |
+
"claude-3-5-sonnet",
|
| 17 |
+
"claude-sonnet-3.5",
|
| 18 |
+
"claude-3-5-sonnet-20240620",
|
| 19 |
+
|
| 20 |
+
# Meta/LLaMA Models
|
| 21 |
+
"@cf/meta/llama-2-7b-chat-fp16",
|
| 22 |
+
"@cf/meta/llama-2-7b-chat-int8",
|
| 23 |
+
"@cf/meta/llama-3-8b-instruct",
|
| 24 |
+
"@cf/meta/llama-3.1-8b-instruct",
|
| 25 |
+
"@cf/meta-llama/llama-2-7b-chat-hf-lora",
|
| 26 |
+
"llama-3.1-405b",
|
| 27 |
+
"llama-3.1-70b",
|
| 28 |
+
"llama-3.1-8b",
|
| 29 |
+
"meta-llama/Llama-2-7b-chat-hf",
|
| 30 |
+
"meta-llama/Llama-3.1-70B-Instruct",
|
| 31 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
| 32 |
+
"meta-llama/Llama-3.2-11B-Vision-Instruct",
|
| 33 |
+
"meta-llama/Llama-3.2-1B-Instruct",
|
| 34 |
+
"meta-llama/Llama-3.2-3B-Instruct",
|
| 35 |
+
"meta-llama/Llama-3.2-90B-Vision-Instruct",
|
| 36 |
+
"meta-llama/Llama-Guard-3-8B",
|
| 37 |
+
"meta-llama/Meta-Llama-3-70B-Instruct",
|
| 38 |
+
"meta-llama/Meta-Llama-3-8B-Instruct",
|
| 39 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
| 40 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
| 41 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
| 42 |
+
|
| 43 |
+
# Mistral Models
|
| 44 |
+
"mistral",
|
| 45 |
+
"mistral-large",
|
| 46 |
+
"@cf/mistral/mistral-7b-instruct-v0.1",
|
| 47 |
+
"@cf/mistral/mistral-7b-instruct-v0.2-lora",
|
| 48 |
+
"@hf/mistralai/mistral-7b-instruct-v0.2",
|
| 49 |
+
"mistralai/Mistral-7B-Instruct-v0.2",
|
| 50 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 51 |
+
"mistralai/Mixtral-8x22B-Instruct-v0.1",
|
| 52 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 53 |
+
|
| 54 |
+
# Qwen Models
|
| 55 |
+
"@cf/qwen/qwen1.5-0.5b-chat",
|
| 56 |
+
"@cf/qwen/qwen1.5-1.8b-chat",
|
| 57 |
+
"@cf/qwen/qwen1.5-7b-chat-awq",
|
| 58 |
+
"@cf/qwen/qwen1.5-14b-chat-awq",
|
| 59 |
+
"Qwen/Qwen2.5-3B-Instruct",
|
| 60 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
| 61 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 62 |
+
|
| 63 |
+
# Google/Gemini Models
|
| 64 |
+
"@cf/google/gemma-2b-it-lora",
|
| 65 |
+
"@cf/google/gemma-7b-it-lora",
|
| 66 |
+
"@hf/google/gemma-7b-it",
|
| 67 |
+
"google/gemma-1.1-2b-it",
|
| 68 |
+
"google/gemma-1.1-7b-it",
|
| 69 |
+
"gemini-pro",
|
| 70 |
+
"gemini-1.5-pro",
|
| 71 |
+
"gemini-1.5-pro-latest",
|
| 72 |
+
"gemini-1.5-flash",
|
| 73 |
+
|
| 74 |
+
# Cohere Models
|
| 75 |
+
"c4ai-aya-23-35b",
|
| 76 |
+
"c4ai-aya-23-8b",
|
| 77 |
+
"command",
|
| 78 |
+
"command-light",
|
| 79 |
+
"command-light-nightly",
|
| 80 |
+
"command-nightly",
|
| 81 |
+
"command-r",
|
| 82 |
+
"command-r-08-2024",
|
| 83 |
+
"command-r-plus",
|
| 84 |
+
"command-r-plus-08-2024",
|
| 85 |
+
"rerank-english-v2.0",
|
| 86 |
+
"rerank-english-v3.0",
|
| 87 |
+
"rerank-multilingual-v2.0",
|
| 88 |
+
"rerank-multilingual-v3.0",
|
| 89 |
+
|
| 90 |
+
# Microsoft Models
|
| 91 |
+
"@cf/microsoft/phi-2",
|
| 92 |
+
"microsoft/DialoGPT-medium",
|
| 93 |
+
"microsoft/Phi-3-medium-4k-instruct",
|
| 94 |
+
"microsoft/Phi-3-mini-4k-instruct",
|
| 95 |
+
"microsoft/Phi-3.5-mini-instruct",
|
| 96 |
+
"microsoft/WizardLM-2-8x22B",
|
| 97 |
+
|
| 98 |
+
# Yi Models
|
| 99 |
+
"01-ai/Yi-1.5-34B-Chat",
|
| 100 |
+
"01-ai/Yi-34B-Chat",
|
| 101 |
+
]
|
| 102 |
+
|
| 103 |
+
# Create mapping from simplified model names to original model names
|
| 104 |
+
model_mapping = {}
|
| 105 |
+
simplified_models = []
|
| 106 |
+
|
| 107 |
+
for original_model in original_models:
|
| 108 |
+
simplified_name = original_model.split('/')[-1]
|
| 109 |
+
if simplified_name in model_mapping:
|
| 110 |
+
# Conflict detected, handle as per instructions
|
| 111 |
+
print(f"Conflict detected for model name '{simplified_name}'. Excluding '{original_model}' from available models.")
|
| 112 |
+
continue
|
| 113 |
+
model_mapping[simplified_name] = original_model
|
| 114 |
+
simplified_models.append(simplified_name)
|
| 115 |
+
|
| 116 |
+
def generate(
|
| 117 |
+
model,
|
| 118 |
+
messages,
|
| 119 |
+
temperature=0.7,
|
| 120 |
+
top_p=1.0,
|
| 121 |
+
n=1,
|
| 122 |
+
stream=False,
|
| 123 |
+
stop=None,
|
| 124 |
+
max_tokens=None,
|
| 125 |
+
presence_penalty=0.0,
|
| 126 |
+
frequency_penalty=0.0,
|
| 127 |
+
logit_bias=None,
|
| 128 |
+
user=None,
|
| 129 |
+
timeout=30,
|
| 130 |
+
):
|
| 131 |
+
"""
|
| 132 |
+
Generates a chat completion using the provided model and messages.
|
| 133 |
+
"""
|
| 134 |
+
# Use the simplified model names
|
| 135 |
+
models = simplified_models
|
| 136 |
+
|
| 137 |
+
if model not in models:
|
| 138 |
+
raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(models)}")
|
| 139 |
+
|
| 140 |
+
# Map simplified model name to original model name
|
| 141 |
+
original_model = model_mapping[model]
|
| 142 |
+
|
| 143 |
+
api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
|
| 144 |
+
|
| 145 |
+
headers = {
|
| 146 |
+
"authority": "chat.typegpt.net",
|
| 147 |
+
"accept": "application/json, text/event-stream",
|
| 148 |
+
"accept-language": "en-US,en;q=0.9",
|
| 149 |
+
"content-type": "application/json",
|
| 150 |
+
"origin": "https://chat.typegpt.net",
|
| 151 |
+
"referer": "https://chat.typegpt.net/",
|
| 152 |
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
# Payload
|
| 156 |
+
payload = {
|
| 157 |
+
"messages": messages,
|
| 158 |
+
"stream": stream,
|
| 159 |
+
"model": original_model,
|
| 160 |
+
"temperature": temperature,
|
| 161 |
+
"presence_penalty": presence_penalty,
|
| 162 |
+
"frequency_penalty": frequency_penalty,
|
| 163 |
+
"top_p": top_p,
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
# Only include max_tokens if it's not None
|
| 167 |
+
if max_tokens is not None:
|
| 168 |
+
payload["max_tokens"] = max_tokens
|
| 169 |
+
|
| 170 |
+
# Only include 'stop' if it's not None
|
| 171 |
+
if stop is not None:
|
| 172 |
+
payload["stop"] = stop
|
| 173 |
+
|
| 174 |
+
# Check if logit_bias is provided
|
| 175 |
+
if logit_bias is not None:
|
| 176 |
+
payload["logit_bias"] = logit_bias
|
| 177 |
+
|
| 178 |
+
# Include 'user' if provided
|
| 179 |
+
if user is not None:
|
| 180 |
+
payload["user"] = user
|
| 181 |
+
|
| 182 |
+
# Start the request
|
| 183 |
+
session = requests.Session()
|
| 184 |
+
response = session.post(
|
| 185 |
+
api_endpoint, headers=headers, json=payload, stream=stream, timeout=timeout
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
if not response.ok:
|
| 189 |
+
raise Exception(f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}")
|
| 190 |
+
|
| 191 |
+
def stream_response():
|
| 192 |
+
for line in response.iter_lines():
|
| 193 |
+
if line:
|
| 194 |
+
line = line.decode("utf-8")
|
| 195 |
+
if line.startswith("data: "):
|
| 196 |
+
line = line[6:] # Remove "data: " prefix
|
| 197 |
+
if line.strip() == "[DONE]":
|
| 198 |
+
break
|
| 199 |
+
try:
|
| 200 |
+
data = json.loads(line)
|
| 201 |
+
yield data
|
| 202 |
+
except json.JSONDecodeError:
|
| 203 |
+
continue
|
| 204 |
+
|
| 205 |
+
if stream:
|
| 206 |
+
return stream_response()
|
| 207 |
+
else:
|
| 208 |
+
return response.json()
|
| 209 |
+
|
| 210 |
+
if __name__ == "__main__":
|
| 211 |
+
# Example usage
|
| 212 |
+
# model = "claude-3-5-sonnet-20240620"
|
| 213 |
+
# model = "qwen1.5-0.5b-chat"
|
| 214 |
+
# model = "llama-2-7b-chat-fp16"
|
| 215 |
+
model = "gpt-3.5-turbo"
|
| 216 |
+
messages = [
|
| 217 |
+
{"role": "system", "content": "Be Detailed"},
|
| 218 |
+
{"role": "user", "content": "What is the knowledge cut off? Be specific and also specify the month, year and date. If not sure, then provide approximate."}
|
| 219 |
+
]
|
| 220 |
+
|
| 221 |
+
# try:
|
| 222 |
+
# # For non-streamed response
|
| 223 |
+
# response = generate(
|
| 224 |
+
# model=model,
|
| 225 |
+
# messages=messages,
|
| 226 |
+
# temperature=0.5,
|
| 227 |
+
# max_tokens=4000,
|
| 228 |
+
# stream=False # Change to True for streaming
|
| 229 |
+
# )
|
| 230 |
+
# if 'choices' in response:
|
| 231 |
+
# reply = response['choices'][0]['message']['content']
|
| 232 |
+
# print(reply)
|
| 233 |
+
# else:
|
| 234 |
+
# print("No response received.")
|
| 235 |
+
# except Exception as e:
|
| 236 |
+
# print(e)
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
try:
|
| 240 |
+
# For streamed response
|
| 241 |
+
response = generate(
|
| 242 |
+
model=model,
|
| 243 |
+
messages=messages,
|
| 244 |
+
temperature=0.5,
|
| 245 |
+
max_tokens=4000,
|
| 246 |
+
stream=True, # Change to False for non-streamed response
|
| 247 |
+
)
|
| 248 |
+
for data in response:
|
| 249 |
+
if 'choices' in data:
|
| 250 |
+
reply = data['choices'][0]['delta']['content']
|
| 251 |
+
print(reply, end="", flush=True)
|
| 252 |
+
else:
|
| 253 |
+
print("No response received.")
|
| 254 |
+
except Exception as e:
|
| 255 |
+
print(e)
|
| 256 |
+
```
|
| 257 |
+
|
| 258 |
+
```python
|
| 259 |
+
from fastapi import FastAPI, Request, Response
|
| 260 |
+
from fastapi.responses import JSONResponse, StreamingResponse
|
| 261 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 262 |
+
import uvicorn
|
| 263 |
+
import asyncio
|
| 264 |
+
import json
|
| 265 |
+
import requests
|
| 266 |
+
|
| 267 |
+
from TYPEGPT.typegpt_api import generate, model_mapping, simplified_models
|
| 268 |
+
from api_info import developer_info
|
| 269 |
+
|
| 270 |
+
app = FastAPI()
|
| 271 |
+
|
| 272 |
+
# Set up CORS middleware if needed
|
| 273 |
+
app.add_middleware(
|
| 274 |
+
CORSMiddleware,
|
| 275 |
+
allow_origins=["*"],
|
| 276 |
+
allow_credentials=True,
|
| 277 |
+
allow_methods=["*"],
|
| 278 |
+
allow_headers=["*"],
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
@app.get("/health_check")
|
| 282 |
+
async def health_check():
|
| 283 |
+
return {"status": "OK"}
|
| 284 |
+
|
| 285 |
+
@app.get("/models")
|
| 286 |
+
async def get_models():
|
| 287 |
+
# Retrieve models from TypeGPT API and forward the response
|
| 288 |
+
api_endpoint = "https://chat.typegpt.net/api/openai/v1/models"
|
| 289 |
+
try:
|
| 290 |
+
response = requests.get(api_endpoint)
|
| 291 |
+
# return response.text
|
| 292 |
+
return JSONResponse(content=response.json(), status_code=response.status_code)
|
| 293 |
+
except Exception as e:
|
| 294 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
| 295 |
+
|
| 296 |
+
@app.post("/chat/completions")
|
| 297 |
+
async def chat_completions(request: Request):
|
| 298 |
+
# Receive the JSON payload
|
| 299 |
+
try:
|
| 300 |
+
body = await request.json()
|
| 301 |
+
except Exception as e:
|
| 302 |
+
return JSONResponse(content={"error": "Invalid JSON payload"}, status_code=400)
|
| 303 |
+
|
| 304 |
+
# Extract parameters
|
| 305 |
+
model = body.get("model")
|
| 306 |
+
messages = body.get("messages")
|
| 307 |
+
temperature = body.get("temperature", 0.7)
|
| 308 |
+
top_p = body.get("top_p", 1.0)
|
| 309 |
+
n = body.get("n", 1)
|
| 310 |
+
stream = body.get("stream", False)
|
| 311 |
+
stop = body.get("stop")
|
| 312 |
+
max_tokens = body.get("max_tokens")
|
| 313 |
+
presence_penalty = body.get("presence_penalty", 0.0)
|
| 314 |
+
frequency_penalty = body.get("frequency_penalty", 0.0)
|
| 315 |
+
logit_bias = body.get("logit_bias")
|
| 316 |
+
user = body.get("user")
|
| 317 |
+
timeout = 30 # or set based on your preference
|
| 318 |
+
|
| 319 |
+
# Validate required parameters
|
| 320 |
+
if not model:
|
| 321 |
+
return JSONResponse(content={"error": "The 'model' parameter is required."}, status_code=400)
|
| 322 |
+
if not messages:
|
| 323 |
+
return JSONResponse(content={"error": "The 'messages' parameter is required."}, status_code=400)
|
| 324 |
+
|
| 325 |
+
# Call the generate function
|
| 326 |
+
try:
|
| 327 |
+
if stream:
|
| 328 |
+
async def generate_stream():
|
| 329 |
+
response = generate(
|
| 330 |
+
model=model,
|
| 331 |
+
messages=messages,
|
| 332 |
+
temperature=temperature,
|
| 333 |
+
top_p=top_p,
|
| 334 |
+
n=n,
|
| 335 |
+
stream=True,
|
| 336 |
+
stop=stop,
|
| 337 |
+
max_tokens=max_tokens,
|
| 338 |
+
presence_penalty=presence_penalty,
|
| 339 |
+
frequency_penalty=frequency_penalty,
|
| 340 |
+
logit_bias=logit_bias,
|
| 341 |
+
user=user,
|
| 342 |
+
timeout=timeout,
|
| 343 |
+
)
|
| 344 |
+
|
| 345 |
+
for chunk in response:
|
| 346 |
+
yield f"data: {json.dumps(chunk)}\n\n"
|
| 347 |
+
yield "data: [DONE]\n\n"
|
| 348 |
+
|
| 349 |
+
return StreamingResponse(
|
| 350 |
+
generate_stream(),
|
| 351 |
+
media_type="text/event-stream",
|
| 352 |
+
headers={
|
| 353 |
+
"Cache-Control": "no-cache",
|
| 354 |
+
"Connection": "keep-alive",
|
| 355 |
+
"Transfer-Encoding": "chunked"
|
| 356 |
+
}
|
| 357 |
+
)
|
| 358 |
+
else:
|
| 359 |
+
response = generate(
|
| 360 |
+
model=model,
|
| 361 |
+
messages=messages,
|
| 362 |
+
temperature=temperature,
|
| 363 |
+
top_p=top_p,
|
| 364 |
+
n=n,
|
| 365 |
+
stream=False,
|
| 366 |
+
stop=stop,
|
| 367 |
+
max_tokens=max_tokens,
|
| 368 |
+
presence_penalty=presence_penalty,
|
| 369 |
+
frequency_penalty=frequency_penalty,
|
| 370 |
+
logit_bias=logit_bias,
|
| 371 |
+
user=user,
|
| 372 |
+
timeout=timeout,
|
| 373 |
+
)
|
| 374 |
+
return JSONResponse(content=response)
|
| 375 |
+
except Exception as e:
|
| 376 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
| 377 |
+
|
| 378 |
+
@app.get("/developer_info")
|
| 379 |
+
async def get_developer_info():
|
| 380 |
+
return JSONResponse(content=developer_info)
|
| 381 |
+
|
| 382 |
+
if __name__ == "__main__":
|
| 383 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
| 384 |
+
```
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.110.2
|
| 2 |
+
Flask==3.0.3
|
| 3 |
+
Requests==2.31.0
|
| 4 |
+
uvicorn==0.29.0
|
| 5 |
+
python-dotenv==1.0.1
|
| 6 |
+
colorama
|
| 7 |
+
pytz
|
typegpt_api.py
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
|
| 4 |
+
# Build model mapping
|
| 5 |
+
original_models = [
|
| 6 |
+
# OpenAI Models
|
| 7 |
+
"gpt-3.5-turbo",
|
| 8 |
+
"gpt-3.5-turbo-202201",
|
| 9 |
+
"gpt-4o",
|
| 10 |
+
"gpt-4o-2024-05-13",
|
| 11 |
+
"o1-preview",
|
| 12 |
+
|
| 13 |
+
# Claude Models
|
| 14 |
+
"claude",
|
| 15 |
+
"claude-3-5-sonnet",
|
| 16 |
+
"claude-sonnet-3.5",
|
| 17 |
+
"claude-3-5-sonnet-20240620",
|
| 18 |
+
|
| 19 |
+
# Meta/LLaMA Models
|
| 20 |
+
"@cf/meta/llama-2-7b-chat-fp16",
|
| 21 |
+
"@cf/meta/llama-2-7b-chat-int8",
|
| 22 |
+
"@cf/meta/llama-3-8b-instruct",
|
| 23 |
+
"@cf/meta/llama-3.1-8b-instruct",
|
| 24 |
+
"@cf/meta-llama/llama-2-7b-chat-hf-lora",
|
| 25 |
+
"llama-3.1-405b",
|
| 26 |
+
"llama-3.1-70b",
|
| 27 |
+
"llama-3.1-8b",
|
| 28 |
+
"meta-llama/Llama-2-7b-chat-hf",
|
| 29 |
+
"meta-llama/Llama-3.1-70B-Instruct",
|
| 30 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
| 31 |
+
"meta-llama/Llama-3.2-11B-Vision-Instruct",
|
| 32 |
+
"meta-llama/Llama-3.2-1B-Instruct",
|
| 33 |
+
"meta-llama/Llama-3.2-3B-Instruct",
|
| 34 |
+
"meta-llama/Llama-3.2-90B-Vision-Instruct",
|
| 35 |
+
"meta-llama/Llama-Guard-3-8B",
|
| 36 |
+
"meta-llama/Meta-Llama-3-70B-Instruct",
|
| 37 |
+
"meta-llama/Meta-Llama-3-8B-Instruct",
|
| 38 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
| 39 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
| 40 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
| 41 |
+
|
| 42 |
+
# Mistral Models
|
| 43 |
+
"mistral",
|
| 44 |
+
"mistral-large",
|
| 45 |
+
"@cf/mistral/mistral-7b-instruct-v0.1",
|
| 46 |
+
"@cf/mistral/mistral-7b-instruct-v0.2-lora",
|
| 47 |
+
"@hf/mistralai/mistral-7b-instruct-v0.2",
|
| 48 |
+
"mistralai/Mistral-7B-Instruct-v0.2",
|
| 49 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 50 |
+
"mistralai/Mixtral-8x22B-Instruct-v0.1",
|
| 51 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 52 |
+
|
| 53 |
+
# Qwen Models
|
| 54 |
+
"@cf/qwen/qwen1.5-0.5b-chat",
|
| 55 |
+
"@cf/qwen/qwen1.5-1.8b-chat",
|
| 56 |
+
"@cf/qwen/qwen1.5-7b-chat-awq",
|
| 57 |
+
"@cf/qwen/qwen1.5-14b-chat-awq",
|
| 58 |
+
"Qwen/Qwen2.5-3B-Instruct",
|
| 59 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
| 60 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 61 |
+
|
| 62 |
+
# Google/Gemini Models
|
| 63 |
+
"@cf/google/gemma-2b-it-lora",
|
| 64 |
+
"@cf/google/gemma-7b-it-lora",
|
| 65 |
+
"@hf/google/gemma-7b-it",
|
| 66 |
+
"google/gemma-1.1-2b-it",
|
| 67 |
+
"google/gemma-1.1-7b-it",
|
| 68 |
+
"gemini-pro",
|
| 69 |
+
"gemini-1.5-pro",
|
| 70 |
+
"gemini-1.5-pro-latest",
|
| 71 |
+
"gemini-1.5-flash",
|
| 72 |
+
|
| 73 |
+
# Cohere Models
|
| 74 |
+
"c4ai-aya-23-35b",
|
| 75 |
+
"c4ai-aya-23-8b",
|
| 76 |
+
"command",
|
| 77 |
+
"command-light",
|
| 78 |
+
"command-light-nightly",
|
| 79 |
+
"command-nightly",
|
| 80 |
+
"command-r",
|
| 81 |
+
"command-r-08-2024",
|
| 82 |
+
"command-r-plus",
|
| 83 |
+
"command-r-plus-08-2024",
|
| 84 |
+
"rerank-english-v2.0",
|
| 85 |
+
"rerank-english-v3.0",
|
| 86 |
+
"rerank-multilingual-v2.0",
|
| 87 |
+
"rerank-multilingual-v3.0",
|
| 88 |
+
|
| 89 |
+
# Microsoft Models
|
| 90 |
+
"@cf/microsoft/phi-2",
|
| 91 |
+
"microsoft/DialoGPT-medium",
|
| 92 |
+
"microsoft/Phi-3-medium-4k-instruct",
|
| 93 |
+
"microsoft/Phi-3-mini-4k-instruct",
|
| 94 |
+
"microsoft/Phi-3.5-mini-instruct",
|
| 95 |
+
"microsoft/WizardLM-2-8x22B",
|
| 96 |
+
|
| 97 |
+
# Yi Models
|
| 98 |
+
"01-ai/Yi-1.5-34B-Chat",
|
| 99 |
+
"01-ai/Yi-34B-Chat",
|
| 100 |
+
]
|
| 101 |
+
|
| 102 |
+
# Create mapping from simplified model names to original model names
|
| 103 |
+
model_mapping = {}
|
| 104 |
+
simplified_models = []
|
| 105 |
+
|
| 106 |
+
for original_model in original_models:
|
| 107 |
+
simplified_name = original_model.split('/')[-1]
|
| 108 |
+
if simplified_name in model_mapping:
|
| 109 |
+
# Conflict detected, handle as per instructions
|
| 110 |
+
print(f"Conflict detected for model name '{simplified_name}'. Excluding '{original_model}' from available models.")
|
| 111 |
+
continue
|
| 112 |
+
model_mapping[simplified_name] = original_model
|
| 113 |
+
simplified_models.append(simplified_name)
|
| 114 |
+
|
| 115 |
+
def generate(
|
| 116 |
+
model,
|
| 117 |
+
messages,
|
| 118 |
+
temperature=0.7,
|
| 119 |
+
top_p=1.0,
|
| 120 |
+
n=1,
|
| 121 |
+
stream=False,
|
| 122 |
+
stop=None,
|
| 123 |
+
max_tokens=None,
|
| 124 |
+
presence_penalty=0.0,
|
| 125 |
+
frequency_penalty=0.0,
|
| 126 |
+
logit_bias=None,
|
| 127 |
+
user=None,
|
| 128 |
+
timeout=30,
|
| 129 |
+
):
|
| 130 |
+
"""
|
| 131 |
+
Generates a chat completion using the provided model and messages.
|
| 132 |
+
"""
|
| 133 |
+
# Use the simplified model names
|
| 134 |
+
models = simplified_models
|
| 135 |
+
|
| 136 |
+
if model not in models:
|
| 137 |
+
raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(models)}")
|
| 138 |
+
|
| 139 |
+
# Map simplified model name to original model name
|
| 140 |
+
original_model = model_mapping[model]
|
| 141 |
+
|
| 142 |
+
api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
|
| 143 |
+
|
| 144 |
+
headers = {
|
| 145 |
+
"authority": "chat.typegpt.net",
|
| 146 |
+
"accept": "application/json, text/event-stream",
|
| 147 |
+
"accept-language": "en-US,en;q=0.9",
|
| 148 |
+
"content-type": "application/json",
|
| 149 |
+
"origin": "https://chat.typegpt.net",
|
| 150 |
+
"referer": "https://chat.typegpt.net/",
|
| 151 |
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
# Payload
|
| 155 |
+
payload = {
|
| 156 |
+
"messages": messages,
|
| 157 |
+
"stream": stream,
|
| 158 |
+
"model": original_model,
|
| 159 |
+
"temperature": temperature,
|
| 160 |
+
"presence_penalty": presence_penalty,
|
| 161 |
+
"frequency_penalty": frequency_penalty,
|
| 162 |
+
"top_p": top_p,
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
# Only include max_tokens if it's not None
|
| 166 |
+
if max_tokens is not None:
|
| 167 |
+
payload["max_tokens"] = max_tokens
|
| 168 |
+
|
| 169 |
+
# Only include 'stop' if it's not None
|
| 170 |
+
if stop is not None:
|
| 171 |
+
payload["stop"] = stop
|
| 172 |
+
|
| 173 |
+
# Check if logit_bias is provided
|
| 174 |
+
if logit_bias is not None:
|
| 175 |
+
payload["logit_bias"] = logit_bias
|
| 176 |
+
|
| 177 |
+
# Include 'user' if provided
|
| 178 |
+
if user is not None:
|
| 179 |
+
payload["user"] = user
|
| 180 |
+
|
| 181 |
+
# Start the request
|
| 182 |
+
session = requests.Session()
|
| 183 |
+
response = session.post(
|
| 184 |
+
api_endpoint, headers=headers, json=payload, stream=stream, timeout=timeout
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
if not response.ok:
|
| 188 |
+
raise Exception(f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}")
|
| 189 |
+
|
| 190 |
+
def stream_response():
|
| 191 |
+
for line in response.iter_lines():
|
| 192 |
+
if line:
|
| 193 |
+
line = line.decode("utf-8")
|
| 194 |
+
if line.startswith("data: "):
|
| 195 |
+
line = line[6:] # Remove "data: " prefix
|
| 196 |
+
if line.strip() == "[DONE]":
|
| 197 |
+
break
|
| 198 |
+
try:
|
| 199 |
+
data = json.loads(line)
|
| 200 |
+
yield data
|
| 201 |
+
except json.JSONDecodeError:
|
| 202 |
+
continue
|
| 203 |
+
|
| 204 |
+
if stream:
|
| 205 |
+
return stream_response()
|
| 206 |
+
else:
|
| 207 |
+
return response.json()
|
| 208 |
+
|
| 209 |
+
if __name__ == "__main__":
|
| 210 |
+
# Example usage
|
| 211 |
+
# model = "claude-3-5-sonnet-20240620"
|
| 212 |
+
# model = "qwen1.5-0.5b-chat"
|
| 213 |
+
# model = "llama-2-7b-chat-fp16"
|
| 214 |
+
model = "gpt-3.5-turbo"
|
| 215 |
+
messages = [
|
| 216 |
+
{"role": "system", "content": "Be Detailed"},
|
| 217 |
+
{"role": "user", "content": "What is the knowledge cut off? Be specific and also specify the month, year and date. If not sure, then provide approximate."}
|
| 218 |
+
]
|
| 219 |
+
|
| 220 |
+
# try:
|
| 221 |
+
# # For non-streamed response
|
| 222 |
+
# response = generate(
|
| 223 |
+
# model=model,
|
| 224 |
+
# messages=messages,
|
| 225 |
+
# temperature=0.5,
|
| 226 |
+
# max_tokens=4000,
|
| 227 |
+
# stream=False # Change to True for streaming
|
| 228 |
+
# )
|
| 229 |
+
# if 'choices' in response:
|
| 230 |
+
# reply = response['choices'][0]['message']['content']
|
| 231 |
+
# print(reply)
|
| 232 |
+
# else:
|
| 233 |
+
# print("No response received.")
|
| 234 |
+
# except Exception as e:
|
| 235 |
+
# print(e)
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
try:
|
| 239 |
+
# For streamed response
|
| 240 |
+
response = generate(
|
| 241 |
+
model=model,
|
| 242 |
+
messages=messages,
|
| 243 |
+
temperature=0.5,
|
| 244 |
+
max_tokens=4000,
|
| 245 |
+
stream=True, # Change to False for non-streamed response
|
| 246 |
+
)
|
| 247 |
+
for data in response:
|
| 248 |
+
if 'choices' in data:
|
| 249 |
+
reply = data['choices'][0]['delta']['content']
|
| 250 |
+
print(reply, end="", flush=True)
|
| 251 |
+
else:
|
| 252 |
+
print("No response received.")
|
| 253 |
+
except Exception as e:
|
| 254 |
+
print(e)
|
usage_inference.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
import time
|
| 4 |
+
from typing import Dict, Any
|
| 5 |
+
|
| 6 |
+
class APITester:
|
| 7 |
+
def __init__(self, base_url: str = "http://localhost:8000"):
|
| 8 |
+
self.base_url = base_url
|
| 9 |
+
self.session = requests.Session()
|
| 10 |
+
|
| 11 |
+
def test_health_check(self) -> None:
|
| 12 |
+
"""Test the health check endpoint."""
|
| 13 |
+
print("\n=== Testing Health Check Endpoint ===")
|
| 14 |
+
try:
|
| 15 |
+
response = self.session.get(f"{self.base_url}/health_check")
|
| 16 |
+
print(f"Status Code: {response.status_code}")
|
| 17 |
+
print(f"Response: {response.json()}")
|
| 18 |
+
assert response.status_code == 200
|
| 19 |
+
print("β
Health check test passed!")
|
| 20 |
+
except Exception as e:
|
| 21 |
+
print(f"β Health check test failed: {str(e)}")
|
| 22 |
+
|
| 23 |
+
def test_models(self) -> None:
|
| 24 |
+
"""Test the models endpoint."""
|
| 25 |
+
print("\n=== Testing Models Endpoint ===")
|
| 26 |
+
try:
|
| 27 |
+
response = self.session.get(f"{self.base_url}/models")
|
| 28 |
+
print(f"Status Code: {response.status_code}")
|
| 29 |
+
data = response.json()
|
| 30 |
+
print(f"Number of models available: {len(data['data'])}")
|
| 31 |
+
print("Sample models:")
|
| 32 |
+
for model in data['data'][:5]: # Show first 5 models
|
| 33 |
+
print(f"- {model['id']}")
|
| 34 |
+
assert response.status_code == 200
|
| 35 |
+
print("β
Models endpoint test passed!")
|
| 36 |
+
except Exception as e:
|
| 37 |
+
print(f"β Models endpoint test failed: {str(e)}")
|
| 38 |
+
|
| 39 |
+
def test_chat_completions_non_streaming(self) -> None:
|
| 40 |
+
"""Test the chat completions endpoint without streaming."""
|
| 41 |
+
print("\n=== Testing Chat Completions Endpoint (Non-Streaming) ===")
|
| 42 |
+
payload = {
|
| 43 |
+
"model": "gpt-3.5-turbo",
|
| 44 |
+
"messages": [
|
| 45 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
| 46 |
+
{"role": "user", "content": "Tell me a short joke about programming."}
|
| 47 |
+
],
|
| 48 |
+
"temperature": 0.7,
|
| 49 |
+
"max_tokens": 150,
|
| 50 |
+
"stream": False
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
try:
|
| 54 |
+
response = self.session.post(
|
| 55 |
+
f"{self.base_url}/chat/completions",
|
| 56 |
+
json=payload
|
| 57 |
+
)
|
| 58 |
+
print(f"Status Code: {response.status_code}")
|
| 59 |
+
if response.status_code == 200:
|
| 60 |
+
data = response.json()
|
| 61 |
+
print("Response content:")
|
| 62 |
+
print(data['choices'][0]['message']['content'])
|
| 63 |
+
assert response.status_code == 200
|
| 64 |
+
print("β
Chat completions (non-streaming) test passed!")
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"β Chat completions (non-streaming) test failed: {str(e)}")
|
| 67 |
+
|
| 68 |
+
def test_chat_completions_streaming(self) -> None:
|
| 69 |
+
"""Test the chat completions endpoint with streaming."""
|
| 70 |
+
print("\n=== Testing Chat Completions Endpoint (Streaming) ===")
|
| 71 |
+
payload = {
|
| 72 |
+
"model": "gpt-3.5-turbo",
|
| 73 |
+
"messages": [
|
| 74 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
| 75 |
+
{"role": "user", "content": "Write 5 lines about India"}
|
| 76 |
+
],
|
| 77 |
+
"temperature": 0.7,
|
| 78 |
+
"max_tokens": 150,
|
| 79 |
+
"stream": True
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
try:
|
| 83 |
+
with self.session.post(
|
| 84 |
+
f"{self.base_url}/chat/completions",
|
| 85 |
+
json=payload,
|
| 86 |
+
stream=True,
|
| 87 |
+
headers={"Accept": "text/event-stream"}
|
| 88 |
+
) as response:
|
| 89 |
+
print(f"Status Code: {response.status_code}")
|
| 90 |
+
print("Streaming response:")
|
| 91 |
+
|
| 92 |
+
buffer = ""
|
| 93 |
+
for chunk in response.iter_lines():
|
| 94 |
+
if chunk:
|
| 95 |
+
chunk = chunk.decode('utf-8')
|
| 96 |
+
if chunk.startswith('data: '):
|
| 97 |
+
chunk = chunk[6:] # Remove 'data: ' prefix
|
| 98 |
+
if chunk.strip() == '[DONE]':
|
| 99 |
+
break
|
| 100 |
+
try:
|
| 101 |
+
data = json.loads(chunk)
|
| 102 |
+
if 'choices' in data and len(data['choices']) > 0:
|
| 103 |
+
if 'delta' in data['choices'][0] and 'content' in data['choices'][0]['delta']:
|
| 104 |
+
content = data['choices'][0]['delta']['content']
|
| 105 |
+
print(content, end='', flush=True)
|
| 106 |
+
time.sleep(0.1) # Add a small delay to simulate real-time streaming
|
| 107 |
+
except json.JSONDecodeError:
|
| 108 |
+
continue
|
| 109 |
+
|
| 110 |
+
print("\nβ
Chat completions (streaming) test passed!")
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f"β Chat completions (streaming) test failed: {str(e)}")
|
| 113 |
+
|
| 114 |
+
def test_developer_info(self) -> None:
|
| 115 |
+
"""Test the developer info endpoint."""
|
| 116 |
+
print("\n=== Testing Developer Info Endpoint ===")
|
| 117 |
+
try:
|
| 118 |
+
response = self.session.get(f"{self.base_url}/developer_info")
|
| 119 |
+
print(f"Status Code: {response.status_code}")
|
| 120 |
+
print("Developer Info:")
|
| 121 |
+
print(json.dumps(response.json(), indent=2))
|
| 122 |
+
assert response.status_code == 200
|
| 123 |
+
print("β
Developer info test passed!")
|
| 124 |
+
except Exception as e:
|
| 125 |
+
print(f"β Developer info test failed: {str(e)}")
|
| 126 |
+
|
| 127 |
+
def run_all_tests(self) -> None:
|
| 128 |
+
"""Run all tests sequentially."""
|
| 129 |
+
tests = [
|
| 130 |
+
self.test_health_check,
|
| 131 |
+
self.test_models,
|
| 132 |
+
self.test_chat_completions_non_streaming,
|
| 133 |
+
self.test_chat_completions_streaming,
|
| 134 |
+
self.test_developer_info
|
| 135 |
+
]
|
| 136 |
+
|
| 137 |
+
print("π Starting API Tests...")
|
| 138 |
+
start_time = time.time()
|
| 139 |
+
|
| 140 |
+
for test in tests:
|
| 141 |
+
test()
|
| 142 |
+
|
| 143 |
+
end_time = time.time()
|
| 144 |
+
duration = end_time - start_time
|
| 145 |
+
|
| 146 |
+
print(f"\n============================")
|
| 147 |
+
print(f"π All tests completed in {duration:.2f} seconds")
|
| 148 |
+
print(f"============================")
|
| 149 |
+
|
| 150 |
+
def main():
|
| 151 |
+
# Initialize tester with your API's base URL
|
| 152 |
+
tester = APITester("http://localhost:8000")
|
| 153 |
+
|
| 154 |
+
# Run all tests
|
| 155 |
+
tester.run_all_tests()
|
| 156 |
+
|
| 157 |
+
if __name__ == "__main__":
|
| 158 |
+
main()
|