Spaces:

API-Handler
/

test_api

Paused

App Files Files Community

API-Handler commited on Nov 24, 2024

Commit

501c69f

verified ·

1 Parent(s): 175bc11

Upload 10 files

Browse files

Files changed (10) hide show

Dockerfile +20 -0
TYPEGPT/typegpt.py +409 -0
TYPEGPT/typegpt_normal.py +231 -0
api_info.py +176 -0
fastapi_app.py +132 -0
flask_app.py +131 -0
query.md +384 -0
requirements.txt +7 -0
typegpt_api.py +254 -0
usage_inference.py +158 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+# Use an official Python runtime as the base image
+FROM python:3.9-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container
+COPY requirements.txt .
+# Install the required packages
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application code into the container
+COPY . .
+# Expose the port that FastAPI will run on
+EXPOSE 7860
+# Command to run the FastAPI application
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

TYPEGPT/typegpt.py ADDED Viewed

	@@ -0,0 +1,409 @@

+import requests
+import json
+from typing import *
+from webscout.AIutel import Optimizers
+from webscout.AIutel import Conversation
+from webscout.AIutel import AwesomePrompts
+from webscout.AIbase import Provider
+from webscout import exceptions
+class TypeGPT(Provider):
+    """
+    A class to interact with the TypeGPT.net API. Improved to match webscout standards.
+    """
+    url = "https://chat.typegpt.net"
+    working = True
+    supports_message_history = True
+    models = [
+        # OpenAI Models
+        "gpt-3.5-turbo",
+        "gpt-3.5-turbo-202201",
+        "gpt-4o",
+        "gpt-4o-2024-05-13",
+        "o1-preview",
+        # Claude Models
+        "claude",
+        "claude-3-5-sonnet",
+        "claude-sonnet-3.5",
+        "claude-3-5-sonnet-20240620",
+        # Meta/LLaMA Models
+        "@cf/meta/llama-2-7b-chat-fp16",
+        "@cf/meta/llama-2-7b-chat-int8",
+        "@cf/meta/llama-3-8b-instruct",
+        "@cf/meta/llama-3.1-8b-instruct",
+        "@cf/meta-llama/llama-2-7b-chat-hf-lora",
+        "llama-3.1-405b",
+        "llama-3.1-70b",
+        "llama-3.1-8b",
+        "meta-llama/Llama-2-7b-chat-hf",
+        "meta-llama/Llama-3.1-70B-Instruct",
+        "meta-llama/Llama-3.1-8B-Instruct",
+        "meta-llama/Llama-3.2-11B-Vision-Instruct",
+        "meta-llama/Llama-3.2-1B-Instruct",
+        "meta-llama/Llama-3.2-3B-Instruct",
+        "meta-llama/Llama-3.2-90B-Vision-Instruct",
+        "meta-llama/Llama-Guard-3-8B",
+        "meta-llama/Meta-Llama-3-70B-Instruct",
+        "meta-llama/Meta-Llama-3-8B-Instruct",
+        "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+        "meta-llama/Meta-Llama-3.1-8B-Instruct",
+        "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+        # Mistral Models
+        "mistral",
+        "mistral-large",
+        "@cf/mistral/mistral-7b-instruct-v0.1",
+        "@cf/mistral/mistral-7b-instruct-v0.2-lora",
+        "@hf/mistralai/mistral-7b-instruct-v0.2",
+        "mistralai/Mistral-7B-Instruct-v0.2",
+        "mistralai/Mistral-7B-Instruct-v0.3",
+        "mistralai/Mixtral-8x22B-Instruct-v0.1",
+        "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        # Qwen Models
+        "@cf/qwen/qwen1.5-0.5b-chat",
+        "@cf/qwen/qwen1.5-1.8b-chat",
+        "@cf/qwen/qwen1.5-7b-chat-awq",
+        "@cf/qwen/qwen1.5-14b-chat-awq",
+        "Qwen/Qwen2.5-3B-Instruct",
+        "Qwen/Qwen2.5-72B-Instruct",
+        "Qwen/Qwen2.5-Coder-32B-Instruct",
+        # Google/Gemini Models
+        "@cf/google/gemma-2b-it-lora",
+        "@cf/google/gemma-7b-it-lora",
+        "@hf/google/gemma-7b-it",
+        "google/gemma-1.1-2b-it",
+        "google/gemma-1.1-7b-it",
+        "gemini-pro",
+        "gemini-1.5-pro",
+        "gemini-1.5-pro-latest",
+        "gemini-1.5-flash",
+        # Cohere Models
+        "c4ai-aya-23-35b",
+        "c4ai-aya-23-8b",
+        "command",
+        "command-light",
+        "command-light-nightly",
+        "command-nightly",
+        "command-r",
+        "command-r-08-2024",
+        "command-r-plus",
+        "command-r-plus-08-2024",
+        "rerank-english-v2.0",
+        "rerank-english-v3.0",
+        "rerank-multilingual-v2.0",
+        "rerank-multilingual-v3.0",
+        # Microsoft Models
+        "@cf/microsoft/phi-2",
+        "microsoft/DialoGPT-medium",
+        "microsoft/Phi-3-medium-4k-instruct",
+        "microsoft/Phi-3-mini-4k-instruct",
+        "microsoft/Phi-3.5-mini-instruct",
+        "microsoft/WizardLM-2-8x22B",
+        # Yi Models
+        "01-ai/Yi-1.5-34B-Chat",
+        "01-ai/Yi-34B-Chat",
+        # Specialized Models and Tools
+        "@cf/deepseek-ai/deepseek-math-7b-base",
+        "@cf/deepseek-ai/deepseek-math-7b-instruct",
+        "@cf/defog/sqlcoder-7b-2",
+        "@cf/openchat/openchat-3.5-0106",
+        "@cf/thebloke/discolm-german-7b-v1-awq",
+        "@cf/tiiuae/falcon-7b-instruct",
+        "@cf/tinyllama/tinyllama-1.1b-chat-v1.0",
+        "@hf/nexusflow/starling-lm-7b-beta",
+        "@hf/nousresearch/hermes-2-pro-mistral-7b",
+        "@hf/thebloke/deepseek-coder-6.7b-base-awq",
+        "@hf/thebloke/deepseek-coder-6.7b-instruct-awq",
+        "@hf/thebloke/llama-2-13b-chat-awq",
+        "@hf/thebloke/llamaguard-7b-awq",
+        "@hf/thebloke/neural-chat-7b-v3-1-awq",
+        "@hf/thebloke/openhermes-2.5-mistral-7b-awq",
+        "@hf/thebloke/zephyr-7b-beta-awq",
+        "AndroidDeveloper",
+        "AngularJSAgent",
+        "AzureAgent",
+        "BitbucketAgent",
+        "DigitalOceanAgent",
+        "DockerAgent",
+        "ElectronAgent",
+        "ErlangAgent",
+        "FastAPIAgent",
+        "FirebaseAgent",
+        "FlaskAgent",
+        "FlutterAgent",
+        "GitAgent",
+        "GitlabAgent",
+        "GoAgent",
+        "GodotAgent",
+        "GoogleCloudAgent",
+        "HTMLAgent",
+        "HerokuAgent",
+        "ImageGeneration",
+        "JavaAgent",
+        "JavaScriptAgent",
+        "MongoDBAgent",
+        "Next.jsAgent",
+        "PyTorchAgent",
+        "PythonAgent",
+        "ReactAgent",
+        "RepoMap",
+        "SwiftDeveloper",
+        "XcodeAgent",
+        "YoutubeAgent",
+        "blackboxai",
+        "blackboxai-pro",
+        "builderAgent",
+        "dify",
+        "flux",
+        "openchat/openchat-3.6-8b",
+        "rtist",
+        "searchgpt",
+        "sur",
+        "sur-mistral",
+        "unity"
+    ]
+    def __init__(
+        self,
+        is_conversation: bool = True,
+        max_tokens: int = 4000,  # Set a reasonable default
+        timeout: int = 30,
+        intro: str = None,
+        filepath: str = None,
+        update_file: bool = True,
+        proxies: dict = {},
+        history_offset: int = 10250,
+        act: str = None,
+        model: str = "claude-3-5-sonnet-20240620",
+        system_prompt: str = "You are a helpful assistant.",
+        temperature: float = 0.5,
+        presence_penalty: int = 0,
+        frequency_penalty: int = 0,
+        top_p: float = 1,
+    ):
+        """Initializes the TypeGPT API client."""
+        if model not in self.models:
+            raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(self.models)}")
+        self.session = requests.Session()
+        self.is_conversation = is_conversation
+        self.max_tokens_to_sample = max_tokens
+        self.api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
+        self.timeout = timeout
+        self.last_response = {}
+        self.last_response_status_code = None  # Added line for status code
+        self.model = model
+        self.system_prompt = system_prompt
+        self.temperature = temperature
+        self.presence_penalty = presence_penalty
+        self.frequency_penalty = frequency_penalty
+        self.top_p = top_p
+        self.headers = {
+            "authority": "chat.typegpt.net",
+            "accept": "application/json, text/event-stream",
+            "accept-language": "en-US,en;q=0.9",
+            "content-type": "application/json",
+            "origin": "https://chat.typegpt.net",
+            "referer": "https://chat.typegpt.net/",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+        }
+        self.__available_optimizers = (
+            method
+            for method in dir(Optimizers)
+            if callable(getattr(Optimizers, method)) and not method.startswith("__")
+        )
+        Conversation.intro = (
+            AwesomePrompts().get_act(
+                act, raise_not_found=True, default=None, case_insensitive=True
+            )
+            if act
+            else intro or Conversation.intro
+        )
+        self.conversation = Conversation(
+            is_conversation, self.max_tokens_to_sample, filepath, update_file
+        )
+        self.conversation.history_offset = history_offset
+        self.session.proxies = proxies
+    def ask(
+        self,
+        prompt: str,
+        stream: bool = False,
+        raw: bool = False,
+        optimizer: str = None,
+        conversationally: bool = False,
+    ) -> Dict[str, Any] | Generator:
+        """Sends a prompt to the TypeGPT.net API and returns the response."""
+        conversation_prompt = self.conversation.gen_complete_prompt(prompt)
+        if optimizer:
+            if optimizer in self.__available_optimizers:
+                conversation_prompt = getattr(Optimizers, optimizer)(
+                    conversation_prompt if conversationally else prompt
+                )
+            else:
+                raise exceptions.FailedToGenerateResponseError(
+                    f"Optimizer is not one of {self.__available_optimizers}"
+                )
+        payload = {
+            "messages": [
+                {"role": "system", "content": self.system_prompt},
+                {"role": "user", "content": conversation_prompt}
+            ],
+            "stream": stream,
+            "model": self.model,
+            "temperature": self.temperature,
+            "presence_penalty": self.presence_penalty,
+            "frequency_penalty": self.frequency_penalty,
+            "top_p": self.top_p,
+            "max_tokens": self.max_tokens_to_sample,
+        }
+        def for_stream():
+            response = self.session.post(
+                self.api_endpoint, headers=self.headers, json=payload, stream=True, timeout=self.timeout
+            )
+            self.last_response_status_code = response.status_code  # Capture status code
+            if not response.ok:
+                raise exceptions.FailedToGenerateResponseError(
+                    f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}"
+                )
+            message_load = ""
+            for line in response.iter_lines():
+                if line:
+                    line = line.decode("utf-8")
+                    if line.startswith("data: "):
+                        line = line[6:]  # Remove "data: " prefix
+                        # Skip [DONE] message
+                        if line.strip() == "[DONE]":
+                            break
+                        try:
+                            data = json.loads(line)
+                            # Extract and yield only new content
+                            if 'choices' in data and len(data['choices']) > 0:
+                                delta = data['choices'][0].get('delta', {})
+                                if 'content' in delta:
+                                    new_content = delta['content']
+                                    message_load += new_content
+                                    # Yield only the new content
+                                    yield dict(text=new_content) if not raw else new_content
+                                    self.last_response = dict(text=message_load)
+                        except json.JSONDecodeError:
+                            continue
+            self.conversation.update_chat_history(prompt, self.get_message(self.last_response))
+        def for_non_stream():
+            response = self.session.post(self.api_endpoint, headers=self.headers, json=payload)
+            self.last_response_status_code = response.status_code  # Capture status code
+            if not response.ok:
+                raise exceptions.FailedToGenerateResponseError(
+                    f"Request failed - {response.status_code}: {response.text}"
+                )
+            self.last_response = response.json()
+            self.conversation.update_chat_history(prompt, self.get_message(self.last_response))
+            return self.last_response
+        return for_stream() if stream else for_non_stream()
+    def chat(
+        self,
+        prompt: str,
+        stream: bool = False,
+        optimizer: str = None,
+        conversationally: bool = False,
+    ) -> str | Generator[str, None, None]:
+        """Generate response `str` or stream."""
+        if stream:
+            gen = self.ask(
+                prompt, stream=True, optimizer=optimizer, conversationally=conversationally
+            )
+            for chunk in gen:
+                yield self.get_message(chunk)  # Extract text from streamed chunks
+        else:
+            return self.get_message(self.ask(prompt, stream=False, optimizer=optimizer, conversationally=conversationally))
+    def get_message(self, response: Dict[str, Any]) -> str:
+        """Retrieves message from response."""
+        if isinstance(response, str):  # Handle raw responses
+            return response
+        elif isinstance(response, dict):
+            assert isinstance(response, dict), "Response should be of dict data-type only"
+            return response.get("text", "")  # Extract text from dictionary response
+        else:
+            raise TypeError("Invalid response type. Expected str or dict.")
+if __name__ == "__main__":
+    from rich import print
+    from rich.progress import Progress, BarColumn, TextColumn, TimeRemainingColumn, SpinnerColumn
+    from rich.console import Console
+    from rich.table import Table
+    import concurrent.futures
+    def make_api_call(thread_number, results):
+        ai = TypeGPT()
+        try:
+            ai.ask("Test message", stream=False)
+            status_code = ai.last_response_status_code
+            results[thread_number] = status_code
+        except Exception as e:
+            results[thread_number] = str(e)
+    results = {}
+    total_requests = 100
+    console = Console()
+    print("[bold magenta]Starting API Load Test with 100 simultaneous requests...[/bold magenta]\n")
+    with Progress(
+        SpinnerColumn(),
+        "[progress.description]{task.description}",
+        BarColumn(bar_width=None),
+        "[progress.percentage]{task.percentage:>3.0f}%",
+        TimeRemainingColumn(),
+        console=console,
+    ) as progress:
+        task = progress.add_task("[cyan]Sending API Requests...", total=total_requests)
+        with concurrent.futures.ThreadPoolExecutor(max_workers=total_requests) as executor:
+            futures = {
+                executor.submit(make_api_call, i, results): i for i in range(total_requests)
+            }
+            for future in concurrent.futures.as_completed(futures):
+                progress.update(task, advance=1)
+        progress.stop()
+    # Process and display the results
+    successful_calls = sum(1 for status in results.values() if status == 200)
+    failed_calls = total_requests - successful_calls
+    print("\n[bold magenta]API Load Test Results:[/bold magenta]\n")
+    print(f"[bold green]Successful calls: {successful_calls}")
+    print(f"[bold red]Failed calls: {failed_calls}\n")
+    # Create a table to display detailed results
+    table = Table(show_header=True, header_style="bold blue")
+    table.add_column("Thread Number", justify="right", style="dim")
+    table.add_column("Status", style="bold")
+    for thread_number, status in results.items():
+        if status == 200:
+            table.add_row(f"{thread_number}", f"[green]Success[/green]")
+        else:
+            table.add_row(f"{thread_number}", f"[red]Failed ({status})[/red]")
+    print(table)

TYPEGPT/typegpt_normal.py ADDED Viewed

	@@ -0,0 +1,231 @@

+import requests
+import json
+# List of available models
+models = [
+    # OpenAI Models
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-202201",
+    "gpt-4o",
+    "gpt-4o-2024-05-13",
+    "o1-preview",
+    # Claude Models
+    "claude",
+    "claude-3-5-sonnet",
+    "claude-sonnet-3.5",
+    "claude-3-5-sonnet-20240620",
+    # Meta/LLaMA Models
+    "@cf/meta/llama-2-7b-chat-fp16",
+    "@cf/meta/llama-2-7b-chat-int8",
+    "@cf/meta/llama-3-8b-instruct",
+    "@cf/meta/llama-3.1-8b-instruct",
+    "@cf/meta-llama/llama-2-7b-chat-hf-lora",
+    "llama-3.1-405b",
+    "llama-3.1-70b",
+    "llama-3.1-8b",
+    "meta-llama/Llama-2-7b-chat-hf",
+    "meta-llama/Llama-3.1-70B-Instruct",
+    "meta-llama/Llama-3.1-8B-Instruct",
+    "meta-llama/Llama-3.2-11B-Vision-Instruct",
+    "meta-llama/Llama-3.2-1B-Instruct",
+    "meta-llama/Llama-3.2-3B-Instruct",
+    "meta-llama/Llama-3.2-90B-Vision-Instruct",
+    "meta-llama/Llama-Guard-3-8B",
+    "meta-llama/Meta-Llama-3-70B-Instruct",
+    "meta-llama/Meta-Llama-3-8B-Instruct",
+    "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+    "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+    # Mistral Models
+    "mistral",
+    "mistral-large",
+    "@cf/mistral/mistral-7b-instruct-v0.1",
+    "@cf/mistral/mistral-7b-instruct-v0.2-lora",
+    "@hf/mistralai/mistral-7b-instruct-v0.2",
+    "mistralai/Mistral-7B-Instruct-v0.2",
+    "mistralai/Mistral-7B-Instruct-v0.3",
+    "mistralai/Mixtral-8x22B-Instruct-v0.1",
+    "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    # Qwen Models
+    "@cf/qwen/qwen1.5-0.5b-chat",
+    "@cf/qwen/qwen1.5-1.8b-chat",
+    "@cf/qwen/qwen1.5-7b-chat-awq",
+    "@cf/qwen/qwen1.5-14b-chat-awq",
+    "Qwen/Qwen2.5-3B-Instruct",
+    "Qwen/Qwen2.5-72B-Instruct",
+    "Qwen/Qwen2.5-Coder-32B-Instruct",
+    # Google/Gemini Models
+    "@cf/google/gemma-2b-it-lora",
+    "@cf/google/gemma-7b-it-lora",
+    "@hf/google/gemma-7b-it",
+    "google/gemma-1.1-2b-it",
+    "google/gemma-1.1-7b-it",
+    "gemini-pro",
+    "gemini-1.5-pro",
+    "gemini-1.5-pro-latest",
+    "gemini-1.5-flash",
+    # Cohere Models
+    "c4ai-aya-23-35b",
+    "c4ai-aya-23-8b",
+    "command",
+    "command-light",
+    "command-light-nightly",
+    "command-nightly",
+    "command-r",
+    "command-r-08-2024",
+    "command-r-plus",
+    "command-r-plus-08-2024",
+    "rerank-english-v2.0",
+    "rerank-english-v3.0",
+    "rerank-multilingual-v2.0",
+    "rerank-multilingual-v3.0",
+    # Microsoft Models
+    "@cf/microsoft/phi-2",
+    "microsoft/DialoGPT-medium",
+    "microsoft/Phi-3-medium-4k-instruct",
+    "microsoft/Phi-3-mini-4k-instruct",
+    "microsoft/Phi-3.5-mini-instruct",
+    "microsoft/WizardLM-2-8x22B",
+    # Yi Models
+    "01-ai/Yi-1.5-34B-Chat",
+    "01-ai/Yi-34B-Chat",
+    # Specialized Models and Tools
+    "@cf/deepseek-ai/deepseek-math-7b-base",
+    "@cf/deepseek-ai/deepseek-math-7b-instruct",
+    "@cf/defog/sqlcoder-7b-2",
+    "@cf/openchat/openchat-3.5-0106",
+    "@cf/thebloke/discolm-german-7b-v1-awq",
+    "@cf/tiiuae/falcon-7b-instruct",
+    "@cf/tinyllama/tinyllama-1.1b-chat-v1.0",
+    "@hf/nexusflow/starling-lm-7b-beta",
+    "@hf/nousresearch/hermes-2-pro-mistral-7b",
+    "@hf/thebloke/deepseek-coder-6.7b-base-awq",
+    "@hf/thebloke/deepseek-coder-6.7b-instruct-awq",
+    "@hf/thebloke/llama-2-13b-chat-awq",
+    "@hf/thebloke/llamaguard-7b-awq",
+    "@hf/thebloke/neural-chat-7b-v3-1-awq",
+    "@hf/thebloke/openhermes-2.5-mistral-7b-awq",
+    "@hf/thebloke/zephyr-7b-beta-awq",
+    "AndroidDeveloper",
+    "AngularJSAgent",
+    "AzureAgent",
+    "BitbucketAgent",
+    "DigitalOceanAgent",
+    "DockerAgent",
+    "ElectronAgent",
+    "ErlangAgent",
+    "FastAPIAgent",
+    "FirebaseAgent",
+    "FlaskAgent",
+    "FlutterAgent",
+    "GitAgent",
+    "GitlabAgent",
+    "GoAgent",
+    "GodotAgent",
+    "GoogleCloudAgent",
+    "HTMLAgent",
+    "HerokuAgent",
+    "ImageGeneration",
+    "JavaAgent",
+    "JavaScriptAgent",
+    "MongoDBAgent",
+    "Next.jsAgent",
+    "PyTorchAgent",
+    "PythonAgent",
+    "ReactAgent",
+    "RepoMap",
+    "SwiftDeveloper",
+    "XcodeAgent",
+    "YoutubeAgent",
+    "blackboxai",
+    "blackboxai-pro",
+    "builderAgent",
+    "dify",
+    "flux",
+    "openchat/openchat-3.6-8b",
+    "rtist",
+    "searchgpt",
+    "sur",
+    "sur-mistral",
+    "unity"
+]
+# Parameters
+is_conversation = True
+max_tokens = 4000  # Set a reasonable default
+timeout = 30
+model = "claude-3-5-sonnet-20240620"
+system_prompt = "You are a helpful assistant."
+temperature = 0.5
+presence_penalty = 0
+frequency_penalty = 0
+top_p = 1
+if model not in models:
+    raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(models)}")
+session = requests.Session()
+api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
+headers = {
+    "authority": "chat.typegpt.net",
+    "accept": "application/json, text/event-stream",
+    "accept-language": "en-US,en;q=0.9",
+    "content-type": "application/json",
+    "origin": "https://chat.typegpt.net",
+    "referer": "https://chat.typegpt.net/",
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+}
+# Prompt to send
+prompt = "What is the knowledge cut off? Be specific and also specify the month, year and date. If not sure, then provide approximate"
+# Payload
+payload = {
+    "messages": [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": prompt}
+    ],
+    "stream": True,
+    "model": model,
+    "temperature": temperature,
+    "presence_penalty": presence_penalty,
+    "frequency_penalty": frequency_penalty,
+    "top_p": top_p,
+    "max_tokens": max_tokens,
+}
+# Make the API request
+response = session.post(
+    api_endpoint, headers=headers, json=payload, stream=True, timeout=timeout
+)
+if not response.ok:
+    raise Exception(f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}")
+# Process the streamed response
+for line in response.iter_lines():
+    if line:
+        line = line.decode("utf-8")
+        if line.startswith("data: "):
+            line = line[6:]  # Remove "data: " prefix
+            if line.strip() == "[DONE]":
+                break
+            try:
+                data = json.loads(line)
+                if 'choices' in data and len(data['choices']) > 0:
+                    delta = data['choices'][0].get('delta', {})
+                    if 'content' in delta:
+                        new_content = delta['content']
+                        print(new_content, end="", flush=True)
+            except json.JSONDecodeError:
+                continue

api_info.py ADDED Viewed

	@@ -0,0 +1,176 @@

+developer_info = {
+        'developer': 'Devs Do Code',
+        'contact': {
+            'Telegram': 'https://t.me/devsdocode',
+            'YouTube Channel': 'https://www.youtube.com/@DevsDoCode',
+            'LinkedIn': 'https://www.linkedin.com/in/developer-sreejan/',
+            'Discord Server': 'https://discord.gg/ehwfVtsAts',
+            'Instagram': {
+                'Personal': 'https://www.instagram.com/sree.shades_/',
+                'Channel': 'https://www.instagram.com/devsdocode_/'
+            }
+        }
+    }
+endpoint = {
+    'route': "/generate",
+    'params': {
+        "query": "[SEARCH QUERY]"
+    },
+    'optional_params': {
+        "model": "[]",
+        "temperature": "[]",
+        "system_prompt": "[]"
+    },
+    'url_demo' : '/generate?query=Who is Devs Do Code&&model=command-r-plus&&temperature=0.7&&system_prompt=Your Owner is "Devs Do Code"'
+}
+model_providers = {
+    "OpenAI": {
+        "models": [
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-202201",
+            "gpt-4o",
+            "gpt-4o-2024-05-13",
+            "o1-preview"
+        ],
+        "description": "OpenAI's GPT language models"
+    },
+    "Anthropic": {
+        "models": [
+            "claude",
+            "claude-3-5-sonnet",
+            "claude-sonnet-3.5",
+            "claude-3-5-sonnet-20240620"
+        ],
+        "description": "Anthropic's Claude language models"
+    },
+    "Meta": {
+        "models": [
+            "llama-2-7b-chat",
+            "llama-2-7b-chat-int8",
+            "llama-3-8b-instruct",
+            "llama-3.1-8b-instruct",
+            "llama-3.1-405b",
+            "llama-3.1-70b",
+            "llama-3.1-8b",
+            "llama-3.2-11b-vision",
+            "llama-3.2-1b",
+            "llama-3.2-3b",
+            "llama-3.2-90b-vision",
+            "llama-guard-3-8b"
+        ],
+        "description": "Meta's LLaMA language models"
+    },
+    "Mistral": {
+        "models": [
+            "mistral",
+            "mistral-large",
+            "mistral-7b-instruct-v0.1",
+            "mistral-7b-instruct-v0.2",
+            "mistral-7b-instruct-v0.3",
+            "mixtral-8x22b",
+            "mixtral-8x7b"
+        ],
+        "description": "Mistral AI's language models"
+    },
+    "Qwen": {
+        "models": [
+            "qwen1.5-0.5b-chat",
+            "qwen1.5-1.8b-chat",
+            "qwen1.5-7b-chat",
+            "qwen1.5-14b-chat",
+            "qwen2.5-3b",
+            "qwen2.5-72b",
+            "qwen2.5-coder-32b"
+        ],
+        "description": "Qwen's language models"
+    },
+    "Google": {
+        "models": [
+            "gemma-2b",
+            "gemma-7b",
+            "gemini-pro",
+            "gemini-1.5-pro",
+            "gemini-1.5-pro-latest",
+            "gemini-1.5-flash"
+        ],
+        "description": "Google's Gemini and Gemma models"
+    },
+    "Cohere": {
+        "models": [
+            "aya-23-35b",
+            "aya-23-8b",
+            "command",
+            "command-light",
+            "command-nightly",
+            "command-r",
+            "command-r-plus",
+            "rerank-english-v2.0",
+            "rerank-english-v3.0",
+            "rerank-multilingual-v2.0",
+            "rerank-multilingual-v3.0"
+        ],
+        "description": "Cohere's language models"
+    },
+    "Microsoft": {
+        "models": [
+            "phi-2",
+            "dialogpt-medium",
+            "phi-3-medium-4k",
+            "phi-3-mini-4k",
+            "phi-3.5-mini",
+            "wizardlm-2-8x22b"
+        ],
+        "description": "Microsoft's language models"
+    },
+    "Yi": {
+        "models": [
+            "yi-1.5-34b-chat",
+            "yi-34b-chat"
+        ],
+        "description": "01.AI's Yi language models"
+    }
+}
+error_message = {
+        'developer_contact': {
+            'Telegram': 'https://t.me/DevsDoCode',
+            'Instagram': 'https://www.instagram.com/sree.shades_/',
+            'Discord': 'https://discord.gg/ehwfVtsAts',
+            'LinkedIn': 'https://www.linkedin.com/in/developer-sreejan/',
+            'Twitter': 'https://twitter.com/Anand_Sreejan'
+        },
+        'error': 'Oops! Something went wrong. Please contact the developer Devs Do Code.'
+    }
+default_info = """This API is developed and being maintained by Devs Do Code (Sreejan).
+**About the Developer**
+Sreejan, a high school student from Patna, Bihar, India, has emerged as a notable figure in the technology sector.
+His creation of an API is a testament to his dedication and expertise. Despite his youth, Sreejan's contributions
+to artificial intelligence and machine learning are significant. As an AI & ML Engineer, he specializes in Deep Learning,
+Natural Language Processing (NLP), and Robotics, with proficiency in Python, Java, and Mobile App Development.
+Beyond his role as a technology consumer, Sreejan is an active open-source contributor, notably to projects like Hugging Face.
+He is also recognized for his role in community development, particularly through "Devs Do Code," a platform he
+founded to provide quality coding resources, tutorials, and projects. His mission is to equip developers with the
+necessary skills to thrive in the ever-evolving tech landscape. Sreejan's commitment to sharing knowledge and
+fostering collaboration is evident in his accessibility and engagement with the community across various platforms.
+Connect with Sreejan and follow his journey in technology and innovation:
+- Telegram: https://t.me/devsdocode
+- YouTube Channel: https://www.youtube.com/@DevsDoCode
+- LinkedIn: https://www.linkedin.com/in/developer-sreejan/
+- Discord Server: https://discord.gg/ehwfVtsAts
+- Instagram
+    - Personal: https://www.instagram.com/sree.shades_/
+    - Channel: https://www.instagram.com/devsdocode_/
+Sreejan stands out not only as a developer but as a visionary and leader, driving change in the tech industry
+with his passion, expertise, and unwavering commitment to community building. He continues to shape the
+future of technology, one line of code at a time.
+"""

fastapi_app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+from fastapi import FastAPI, Request, Response
+from fastapi.responses import JSONResponse, StreamingResponse
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+import json
+from typegpt_api import generate, model_mapping, simplified_models
+from api_info import developer_info, model_providers
+app = FastAPI()
+# Set up CORS middleware if needed
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/health_check")
+async def health_check():
+    return {"status": "OK"}
+@app.get("/models")
+async def get_models():
+    try:
+        response = {
+            "object": "list",
+            "data": []
+        }
+        for provider, info in model_providers.items():
+            for model in info["models"]:
+                response["data"].append({
+                    "id": model,
+                    "object": "model",
+                    "provider": provider,
+                    "description": info["description"]
+                })
+        return JSONResponse(content=response)
+    except Exception as e:
+        return JSONResponse(content={"error": str(e)}, status_code=500)
+@app.post("/chat/completions")
+async def chat_completions(request: Request):
+    # Receive the JSON payload
+    try:
+        body = await request.json()
+    except Exception as e:
+        return JSONResponse(content={"error": "Invalid JSON payload"}, status_code=400)
+    # Extract parameters
+    model = body.get("model")
+    messages = body.get("messages")
+    temperature = body.get("temperature", 0.7)
+    top_p = body.get("top_p", 1.0)
+    n = body.get("n", 1)
+    stream = body.get("stream", False)
+    stop = body.get("stop")
+    max_tokens = body.get("max_tokens")
+    presence_penalty = body.get("presence_penalty", 0.0)
+    frequency_penalty = body.get("frequency_penalty", 0.0)
+    logit_bias = body.get("logit_bias")
+    user = body.get("user")
+    timeout = 30  # or set based on your preference
+    # Validate required parameters
+    if not model:
+        return JSONResponse(content={"error": "The 'model' parameter is required."}, status_code=400)
+    if not messages:
+        return JSONResponse(content={"error": "The 'messages' parameter is required."}, status_code=400)
+    # Call the generate function
+    try:
+        if stream:
+            async def generate_stream():
+                response = generate(
+                    model=model,
+                    messages=messages,
+                    temperature=temperature,
+                    top_p=top_p,
+                    n=n,
+                    stream=True,
+                    stop=stop,
+                    max_tokens=max_tokens,
+                    presence_penalty=presence_penalty,
+                    frequency_penalty=frequency_penalty,
+                    logit_bias=logit_bias,
+                    user=user,
+                    timeout=timeout,
+                )
+                for chunk in response:
+                    yield f"data: {json.dumps(chunk)}\n\n"
+                yield "data: [DONE]\n\n"
+            return StreamingResponse(
+                generate_stream(),
+                media_type="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "Connection": "keep-alive",
+                    "Transfer-Encoding": "chunked"
+                }
+            )
+        else:
+            response = generate(
+                model=model,
+                messages=messages,
+                temperature=temperature,
+                top_p=top_p,
+                n=n,
+                stream=False,
+                stop=stop,
+                max_tokens=max_tokens,
+                presence_penalty=presence_penalty,
+                frequency_penalty=frequency_penalty,
+                logit_bias=logit_bias,
+                user=user,
+                timeout=timeout,
+            )
+            return JSONResponse(content=response)
+    except Exception as e:
+        return JSONResponse(content={"error": str(e)}, status_code=500)
+@app.get("/developer_info")
+async def get_developer_info():
+    return JSONResponse(content=developer_info)
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)

flask_app.py ADDED Viewed

	@@ -0,0 +1,131 @@

+from flask import Flask, request, Response, jsonify, stream_with_context
+from flask_cors import CORS
+import json
+from typegpt_api import generate, model_mapping, simplified_models
+from api_info import developer_info, model_providers
+app = Flask(__name__)
+# Set up CORS middleware if needed
+CORS(app, resources={
+    r"/*": {
+        "origins": "*",
+        "allow_credentials": True,
+        "methods": ["*"],
+        "headers": ["*"]
+    }
+})
+@app.route("/health_check", methods=['GET'])
+def health_check():
+    return jsonify({"status": "OK"})
+@app.route("/models", methods=['GET'])
+def get_models():
+    try:
+        response = {
+            "object": "list",
+            "data": []
+        }
+        for provider, info in model_providers.items():
+            for model in info["models"]:
+                response["data"].append({
+                    "id": model,
+                    "object": "model",
+                    "provider": provider,
+                    "description": info["description"]
+                })
+        return jsonify(response)
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route("/chat/completions", methods=['POST'])
+def chat_completions():
+    # Receive the JSON payload
+    try:
+        body = request.get_json()
+    except Exception as e:
+        return jsonify({"error": "Invalid JSON payload"}), 400
+    # Extract parameters
+    model = body.get("model")
+    messages = body.get("messages")
+    temperature = body.get("temperature", 0.7)
+    top_p = body.get("top_p", 1.0)
+    n = body.get("n", 1)
+    stream = body.get("stream", False)
+    stop = body.get("stop")
+    max_tokens = body.get("max_tokens")
+    presence_penalty = body.get("presence_penalty", 0.0)
+    frequency_penalty = body.get("frequency_penalty", 0.0)
+    logit_bias = body.get("logit_bias")
+    user = body.get("user")
+    timeout = 30  # or set based on your preference
+    # Validate required parameters
+    if not model:
+        return jsonify({"error": "The 'model' parameter is required."}), 400
+    if not messages:
+        return jsonify({"error": "The 'messages' parameter is required."}), 400
+    # Call the generate function
+    try:
+        if stream:
+            def generate_stream():
+                response = generate(
+                    model=model,
+                    messages=messages,
+                    temperature=temperature,
+                    top_p=top_p,
+                    n=n,
+                    stream=True,
+                    stop=stop,
+                    max_tokens=max_tokens,
+                    presence_penalty=presence_penalty,
+                    frequency_penalty=frequency_penalty,
+                    logit_bias=logit_bias,
+                    user=user,
+                    timeout=timeout,
+                )
+                for chunk in response:
+                    yield f"data: {json.dumps(chunk)}\n\n"
+                yield "data: [DONE]\n\n"
+            return Response(
+                stream_with_context(generate_stream()),
+                mimetype="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "Connection": "keep-alive",
+                    "Transfer-Encoding": "chunked"
+                }
+            )
+        else:
+            response = generate(
+                model=model,
+                messages=messages,
+                temperature=temperature,
+                top_p=top_p,
+                n=n,
+                stream=False,
+                stop=stop,
+                max_tokens=max_tokens,
+                presence_penalty=presence_penalty,
+                frequency_penalty=frequency_penalty,
+                logit_bias=logit_bias,
+                user=user,
+                timeout=timeout,
+            )
+            return jsonify(response)
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route("/developer_info", methods=['GET'])
+def get_developer_info():
+    return jsonify(developer_info)
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=8000)

query.md ADDED Viewed

	@@ -0,0 +1,384 @@

+```python
+import requests
+import json
+# Build model mapping
+original_models = [
+    # OpenAI Models
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-202201",
+    "gpt-4o",
+    "gpt-4o-2024-05-13",
+    "o1-preview",
+    # Claude Models
+    "claude",
+    "claude-3-5-sonnet",
+    "claude-sonnet-3.5",
+    "claude-3-5-sonnet-20240620",
+    # Meta/LLaMA Models
+    "@cf/meta/llama-2-7b-chat-fp16",
+    "@cf/meta/llama-2-7b-chat-int8",
+    "@cf/meta/llama-3-8b-instruct",
+    "@cf/meta/llama-3.1-8b-instruct",
+    "@cf/meta-llama/llama-2-7b-chat-hf-lora",
+    "llama-3.1-405b",
+    "llama-3.1-70b",
+    "llama-3.1-8b",
+    "meta-llama/Llama-2-7b-chat-hf",
+    "meta-llama/Llama-3.1-70B-Instruct",
+    "meta-llama/Llama-3.1-8B-Instruct",
+    "meta-llama/Llama-3.2-11B-Vision-Instruct",
+    "meta-llama/Llama-3.2-1B-Instruct",
+    "meta-llama/Llama-3.2-3B-Instruct",
+    "meta-llama/Llama-3.2-90B-Vision-Instruct",
+    "meta-llama/Llama-Guard-3-8B",
+    "meta-llama/Meta-Llama-3-70B-Instruct",
+    "meta-llama/Meta-Llama-3-8B-Instruct",
+    "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+    "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+    # Mistral Models
+    "mistral",
+    "mistral-large",
+    "@cf/mistral/mistral-7b-instruct-v0.1",
+    "@cf/mistral/mistral-7b-instruct-v0.2-lora",
+    "@hf/mistralai/mistral-7b-instruct-v0.2",
+    "mistralai/Mistral-7B-Instruct-v0.2",
+    "mistralai/Mistral-7B-Instruct-v0.3",
+    "mistralai/Mixtral-8x22B-Instruct-v0.1",
+    "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    # Qwen Models
+    "@cf/qwen/qwen1.5-0.5b-chat",
+    "@cf/qwen/qwen1.5-1.8b-chat",
+    "@cf/qwen/qwen1.5-7b-chat-awq",
+    "@cf/qwen/qwen1.5-14b-chat-awq",
+    "Qwen/Qwen2.5-3B-Instruct",
+    "Qwen/Qwen2.5-72B-Instruct",
+    "Qwen/Qwen2.5-Coder-32B-Instruct",
+    # Google/Gemini Models
+    "@cf/google/gemma-2b-it-lora",
+    "@cf/google/gemma-7b-it-lora",
+    "@hf/google/gemma-7b-it",
+    "google/gemma-1.1-2b-it",
+    "google/gemma-1.1-7b-it",
+    "gemini-pro",
+    "gemini-1.5-pro",
+    "gemini-1.5-pro-latest",
+    "gemini-1.5-flash",
+    # Cohere Models
+    "c4ai-aya-23-35b",
+    "c4ai-aya-23-8b",
+    "command",
+    "command-light",
+    "command-light-nightly",
+    "command-nightly",
+    "command-r",
+    "command-r-08-2024",
+    "command-r-plus",
+    "command-r-plus-08-2024",
+    "rerank-english-v2.0",
+    "rerank-english-v3.0",
+    "rerank-multilingual-v2.0",
+    "rerank-multilingual-v3.0",
+    # Microsoft Models
+    "@cf/microsoft/phi-2",
+    "microsoft/DialoGPT-medium",
+    "microsoft/Phi-3-medium-4k-instruct",
+    "microsoft/Phi-3-mini-4k-instruct",
+    "microsoft/Phi-3.5-mini-instruct",
+    "microsoft/WizardLM-2-8x22B",
+    # Yi Models
+    "01-ai/Yi-1.5-34B-Chat",
+    "01-ai/Yi-34B-Chat",
+]
+# Create mapping from simplified model names to original model names
+model_mapping = {}
+simplified_models = []
+for original_model in original_models:
+    simplified_name = original_model.split('/')[-1]
+    if simplified_name in model_mapping:
+        # Conflict detected, handle as per instructions
+        print(f"Conflict detected for model name '{simplified_name}'. Excluding '{original_model}' from available models.")
+        continue
+    model_mapping[simplified_name] = original_model
+    simplified_models.append(simplified_name)
+def generate(
+    model,
+    messages,
+    temperature=0.7,
+    top_p=1.0,
+    n=1,
+    stream=False,
+    stop=None,
+    max_tokens=None,
+    presence_penalty=0.0,
+    frequency_penalty=0.0,
+    logit_bias=None,
+    user=None,
+    timeout=30,
+):
+    """
+    Generates a chat completion using the provided model and messages.
+    """
+    # Use the simplified model names
+    models = simplified_models
+    if model not in models:
+        raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(models)}")
+    # Map simplified model name to original model name
+    original_model = model_mapping[model]
+    api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
+    headers = {
+        "authority": "chat.typegpt.net",
+        "accept": "application/json, text/event-stream",
+        "accept-language": "en-US,en;q=0.9",
+        "content-type": "application/json",
+        "origin": "https://chat.typegpt.net",
+        "referer": "https://chat.typegpt.net/",
+        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+    }
+    # Payload
+    payload = {
+        "messages": messages,
+        "stream": stream,
+        "model": original_model,
+        "temperature": temperature,
+        "presence_penalty": presence_penalty,
+        "frequency_penalty": frequency_penalty,
+        "top_p": top_p,
+    }
+    # Only include max_tokens if it's not None
+    if max_tokens is not None:
+        payload["max_tokens"] = max_tokens
+    # Only include 'stop' if it's not None
+    if stop is not None:
+        payload["stop"] = stop
+    # Check if logit_bias is provided
+    if logit_bias is not None:
+        payload["logit_bias"] = logit_bias
+    # Include 'user' if provided
+    if user is not None:
+        payload["user"] = user
+    # Start the request
+    session = requests.Session()
+    response = session.post(
+        api_endpoint, headers=headers, json=payload, stream=stream, timeout=timeout
+    )
+    if not response.ok:
+        raise Exception(f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}")
+    def stream_response():
+        for line in response.iter_lines():
+            if line:
+                line = line.decode("utf-8")
+                if line.startswith("data: "):
+                    line = line[6:]  # Remove "data: " prefix
+                    if line.strip() == "[DONE]":
+                        break
+                    try:
+                        data = json.loads(line)
+                        yield data
+                    except json.JSONDecodeError:
+                        continue
+    if stream:
+        return stream_response()
+    else:
+        return response.json()
+if __name__ == "__main__":
+    # Example usage
+    # model = "claude-3-5-sonnet-20240620"
+    # model = "qwen1.5-0.5b-chat"
+    # model = "llama-2-7b-chat-fp16"
+    model = "gpt-3.5-turbo"
+    messages = [
+        {"role": "system", "content": "Be Detailed"},
+        {"role": "user", "content": "What is the knowledge cut off? Be specific and also specify the month, year and date. If not sure, then provide approximate."}
+    ]
+    # try:
+    #     # For non-streamed response
+    #     response = generate(
+    #         model=model,
+    #         messages=messages,
+    #         temperature=0.5,
+    #         max_tokens=4000,
+    #         stream=False  # Change to True for streaming
+    #     )
+    #     if 'choices' in response:
+    #         reply = response['choices'][0]['message']['content']
+    #         print(reply)
+    #     else:
+    #         print("No response received.")
+    # except Exception as e:
+    #     print(e)
+    try:
+        # For streamed response
+        response = generate(
+            model=model,
+            messages=messages,
+            temperature=0.5,
+            max_tokens=4000,
+            stream=True,  # Change to False for non-streamed response
+        )
+        for data in response:
+            if 'choices' in data:
+                reply = data['choices'][0]['delta']['content']
+                print(reply, end="", flush=True)
+            else:
+                print("No response received.")
+    except Exception as e:
+        print(e)
+```
+```python
+from fastapi import FastAPI, Request, Response
+from fastapi.responses import JSONResponse, StreamingResponse
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+import asyncio
+import json
+import requests
+from TYPEGPT.typegpt_api import generate, model_mapping, simplified_models
+from api_info import developer_info
+app = FastAPI()
+# Set up CORS middleware if needed
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/health_check")
+async def health_check():
+    return {"status": "OK"}
+@app.get("/models")
+async def get_models():
+    # Retrieve models from TypeGPT API and forward the response
+    api_endpoint = "https://chat.typegpt.net/api/openai/v1/models"
+    try:
+        response = requests.get(api_endpoint)
+        # return response.text
+        return JSONResponse(content=response.json(), status_code=response.status_code)
+    except Exception as e:
+        return JSONResponse(content={"error": str(e)}, status_code=500)
+@app.post("/chat/completions")
+async def chat_completions(request: Request):
+    # Receive the JSON payload
+    try:
+        body = await request.json()
+    except Exception as e:
+        return JSONResponse(content={"error": "Invalid JSON payload"}, status_code=400)
+    # Extract parameters
+    model = body.get("model")
+    messages = body.get("messages")
+    temperature = body.get("temperature", 0.7)
+    top_p = body.get("top_p", 1.0)
+    n = body.get("n", 1)
+    stream = body.get("stream", False)
+    stop = body.get("stop")
+    max_tokens = body.get("max_tokens")
+    presence_penalty = body.get("presence_penalty", 0.0)
+    frequency_penalty = body.get("frequency_penalty", 0.0)
+    logit_bias = body.get("logit_bias")
+    user = body.get("user")
+    timeout = 30  # or set based on your preference
+    # Validate required parameters
+    if not model:
+        return JSONResponse(content={"error": "The 'model' parameter is required."}, status_code=400)
+    if not messages:
+        return JSONResponse(content={"error": "The 'messages' parameter is required."}, status_code=400)
+    # Call the generate function
+    try:
+        if stream:
+            async def generate_stream():
+                response = generate(
+                    model=model,
+                    messages=messages,
+                    temperature=temperature,
+                    top_p=top_p,
+                    n=n,
+                    stream=True,
+                    stop=stop,
+                    max_tokens=max_tokens,
+                    presence_penalty=presence_penalty,
+                    frequency_penalty=frequency_penalty,
+                    logit_bias=logit_bias,
+                    user=user,
+                    timeout=timeout,
+                )
+                for chunk in response:
+                    yield f"data: {json.dumps(chunk)}\n\n"
+                yield "data: [DONE]\n\n"
+            return StreamingResponse(
+                generate_stream(),
+                media_type="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "Connection": "keep-alive",
+                    "Transfer-Encoding": "chunked"
+                }
+            )
+        else:
+            response = generate(
+                model=model,
+                messages=messages,
+                temperature=temperature,
+                top_p=top_p,
+                n=n,
+                stream=False,
+                stop=stop,
+                max_tokens=max_tokens,
+                presence_penalty=presence_penalty,
+                frequency_penalty=frequency_penalty,
+                logit_bias=logit_bias,
+                user=user,
+                timeout=timeout,
+            )
+            return JSONResponse(content=response)
+    except Exception as e:
+        return JSONResponse(content={"error": str(e)}, status_code=500)
+@app.get("/developer_info")
+async def get_developer_info():
+    return JSONResponse(content=developer_info)
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)
+```

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi==0.110.2
+Flask==3.0.3
+Requests==2.31.0
+uvicorn==0.29.0
+python-dotenv==1.0.1
+colorama
+pytz

typegpt_api.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import requests
+import json
+# Build model mapping
+original_models = [
+    # OpenAI Models
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-202201",
+    "gpt-4o",
+    "gpt-4o-2024-05-13",
+    "o1-preview",
+    # Claude Models
+    "claude",
+    "claude-3-5-sonnet",
+    "claude-sonnet-3.5",
+    "claude-3-5-sonnet-20240620",
+    # Meta/LLaMA Models
+    "@cf/meta/llama-2-7b-chat-fp16",
+    "@cf/meta/llama-2-7b-chat-int8",
+    "@cf/meta/llama-3-8b-instruct",
+    "@cf/meta/llama-3.1-8b-instruct",
+    "@cf/meta-llama/llama-2-7b-chat-hf-lora",
+    "llama-3.1-405b",
+    "llama-3.1-70b",
+    "llama-3.1-8b",
+    "meta-llama/Llama-2-7b-chat-hf",
+    "meta-llama/Llama-3.1-70B-Instruct",
+    "meta-llama/Llama-3.1-8B-Instruct",
+    "meta-llama/Llama-3.2-11B-Vision-Instruct",
+    "meta-llama/Llama-3.2-1B-Instruct",
+    "meta-llama/Llama-3.2-3B-Instruct",
+    "meta-llama/Llama-3.2-90B-Vision-Instruct",
+    "meta-llama/Llama-Guard-3-8B",
+    "meta-llama/Meta-Llama-3-70B-Instruct",
+    "meta-llama/Meta-Llama-3-8B-Instruct",
+    "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+    "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+    # Mistral Models
+    "mistral",
+    "mistral-large",
+    "@cf/mistral/mistral-7b-instruct-v0.1",
+    "@cf/mistral/mistral-7b-instruct-v0.2-lora",
+    "@hf/mistralai/mistral-7b-instruct-v0.2",
+    "mistralai/Mistral-7B-Instruct-v0.2",
+    "mistralai/Mistral-7B-Instruct-v0.3",
+    "mistralai/Mixtral-8x22B-Instruct-v0.1",
+    "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    # Qwen Models
+    "@cf/qwen/qwen1.5-0.5b-chat",
+    "@cf/qwen/qwen1.5-1.8b-chat",
+    "@cf/qwen/qwen1.5-7b-chat-awq",
+    "@cf/qwen/qwen1.5-14b-chat-awq",
+    "Qwen/Qwen2.5-3B-Instruct",
+    "Qwen/Qwen2.5-72B-Instruct",
+    "Qwen/Qwen2.5-Coder-32B-Instruct",
+    # Google/Gemini Models
+    "@cf/google/gemma-2b-it-lora",
+    "@cf/google/gemma-7b-it-lora",
+    "@hf/google/gemma-7b-it",
+    "google/gemma-1.1-2b-it",
+    "google/gemma-1.1-7b-it",
+    "gemini-pro",
+    "gemini-1.5-pro",
+    "gemini-1.5-pro-latest",
+    "gemini-1.5-flash",
+    # Cohere Models
+    "c4ai-aya-23-35b",
+    "c4ai-aya-23-8b",
+    "command",
+    "command-light",
+    "command-light-nightly",
+    "command-nightly",
+    "command-r",
+    "command-r-08-2024",
+    "command-r-plus",
+    "command-r-plus-08-2024",
+    "rerank-english-v2.0",
+    "rerank-english-v3.0",
+    "rerank-multilingual-v2.0",
+    "rerank-multilingual-v3.0",
+    # Microsoft Models
+    "@cf/microsoft/phi-2",
+    "microsoft/DialoGPT-medium",
+    "microsoft/Phi-3-medium-4k-instruct",
+    "microsoft/Phi-3-mini-4k-instruct",
+    "microsoft/Phi-3.5-mini-instruct",
+    "microsoft/WizardLM-2-8x22B",
+    # Yi Models
+    "01-ai/Yi-1.5-34B-Chat",
+    "01-ai/Yi-34B-Chat",
+]
+# Create mapping from simplified model names to original model names
+model_mapping = {}
+simplified_models = []
+for original_model in original_models:
+    simplified_name = original_model.split('/')[-1]
+    if simplified_name in model_mapping:
+        # Conflict detected, handle as per instructions
+        print(f"Conflict detected for model name '{simplified_name}'. Excluding '{original_model}' from available models.")
+        continue
+    model_mapping[simplified_name] = original_model
+    simplified_models.append(simplified_name)
+def generate(
+    model,
+    messages,
+    temperature=0.7,
+    top_p=1.0,
+    n=1,
+    stream=False,
+    stop=None,
+    max_tokens=None,
+    presence_penalty=0.0,
+    frequency_penalty=0.0,
+    logit_bias=None,
+    user=None,
+    timeout=30,
+):
+    """
+    Generates a chat completion using the provided model and messages.
+    """
+    # Use the simplified model names
+    models = simplified_models
+    if model not in models:
+        raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(models)}")
+    # Map simplified model name to original model name
+    original_model = model_mapping[model]
+    api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
+    headers = {
+        "authority": "chat.typegpt.net",
+        "accept": "application/json, text/event-stream",
+        "accept-language": "en-US,en;q=0.9",
+        "content-type": "application/json",
+        "origin": "https://chat.typegpt.net",
+        "referer": "https://chat.typegpt.net/",
+        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+    }
+    # Payload
+    payload = {
+        "messages": messages,
+        "stream": stream,
+        "model": original_model,
+        "temperature": temperature,
+        "presence_penalty": presence_penalty,
+        "frequency_penalty": frequency_penalty,
+        "top_p": top_p,
+    }
+    # Only include max_tokens if it's not None
+    if max_tokens is not None:
+        payload["max_tokens"] = max_tokens
+    # Only include 'stop' if it's not None
+    if stop is not None:
+        payload["stop"] = stop
+    # Check if logit_bias is provided
+    if logit_bias is not None:
+        payload["logit_bias"] = logit_bias
+    # Include 'user' if provided
+    if user is not None:
+        payload["user"] = user
+    # Start the request
+    session = requests.Session()
+    response = session.post(
+        api_endpoint, headers=headers, json=payload, stream=stream, timeout=timeout
+    )
+    if not response.ok:
+        raise Exception(f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}")
+    def stream_response():
+        for line in response.iter_lines():
+            if line:
+                line = line.decode("utf-8")
+                if line.startswith("data: "):
+                    line = line[6:]  # Remove "data: " prefix
+                    if line.strip() == "[DONE]":
+                        break
+                    try:
+                        data = json.loads(line)
+                        yield data
+                    except json.JSONDecodeError:
+                        continue
+    if stream:
+        return stream_response()
+    else:
+        return response.json()
+if __name__ == "__main__":
+    # Example usage
+    # model = "claude-3-5-sonnet-20240620"
+    # model = "qwen1.5-0.5b-chat"
+    # model = "llama-2-7b-chat-fp16"
+    model = "gpt-3.5-turbo"
+    messages = [
+        {"role": "system", "content": "Be Detailed"},
+        {"role": "user", "content": "What is the knowledge cut off? Be specific and also specify the month, year and date. If not sure, then provide approximate."}
+    ]
+    # try:
+    #     # For non-streamed response
+    #     response = generate(
+    #         model=model,
+    #         messages=messages,
+    #         temperature=0.5,
+    #         max_tokens=4000,
+    #         stream=False  # Change to True for streaming
+    #     )
+    #     if 'choices' in response:
+    #         reply = response['choices'][0]['message']['content']
+    #         print(reply)
+    #     else:
+    #         print("No response received.")
+    # except Exception as e:
+    #     print(e)
+    try:
+        # For streamed response
+        response = generate(
+            model=model,
+            messages=messages,
+            temperature=0.5,
+            max_tokens=4000,
+            stream=True,  # Change to False for non-streamed response
+        )
+        for data in response:
+            if 'choices' in data:
+                reply = data['choices'][0]['delta']['content']
+                print(reply, end="", flush=True)
+            else:
+                print("No response received.")
+    except Exception as e:
+        print(e)

usage_inference.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import requests
+import json
+import time
+from typing import Dict, Any
+class APITester:
+    def __init__(self, base_url: str = "http://localhost:8000"):
+        self.base_url = base_url
+        self.session = requests.Session()
+    def test_health_check(self) -> None:
+        """Test the health check endpoint."""
+        print("\n=== Testing Health Check Endpoint ===")
+        try:
+            response = self.session.get(f"{self.base_url}/health_check")
+            print(f"Status Code: {response.status_code}")
+            print(f"Response: {response.json()}")
+            assert response.status_code == 200
+            print("✅ Health check test passed!")
+        except Exception as e:
+            print(f"❌ Health check test failed: {str(e)}")
+    def test_models(self) -> None:
+        """Test the models endpoint."""
+        print("\n=== Testing Models Endpoint ===")
+        try:
+            response = self.session.get(f"{self.base_url}/models")
+            print(f"Status Code: {response.status_code}")
+            data = response.json()
+            print(f"Number of models available: {len(data['data'])}")
+            print("Sample models:")
+            for model in data['data'][:5]:  # Show first 5 models
+                print(f"- {model['id']}")
+            assert response.status_code == 200
+            print("✅ Models endpoint test passed!")
+        except Exception as e:
+            print(f"❌ Models endpoint test failed: {str(e)}")
+    def test_chat_completions_non_streaming(self) -> None:
+        """Test the chat completions endpoint without streaming."""
+        print("\n=== Testing Chat Completions Endpoint (Non-Streaming) ===")
+        payload = {
+            "model": "gpt-3.5-turbo",
+            "messages": [
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": "Tell me a short joke about programming."}
+            ],
+            "temperature": 0.7,
+            "max_tokens": 150,
+            "stream": False
+        }
+        try:
+            response = self.session.post(
+                f"{self.base_url}/chat/completions",
+                json=payload
+            )
+            print(f"Status Code: {response.status_code}")
+            if response.status_code == 200:
+                data = response.json()
+                print("Response content:")
+                print(data['choices'][0]['message']['content'])
+            assert response.status_code == 200
+            print("✅ Chat completions (non-streaming) test passed!")
+        except Exception as e:
+            print(f"❌ Chat completions (non-streaming) test failed: {str(e)}")
+    def test_chat_completions_streaming(self) -> None:
+        """Test the chat completions endpoint with streaming."""
+        print("\n=== Testing Chat Completions Endpoint (Streaming) ===")
+        payload = {
+            "model": "gpt-3.5-turbo",
+            "messages": [
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": "Write 5 lines about India"}
+            ],
+            "temperature": 0.7,
+            "max_tokens": 150,
+            "stream": True
+        }
+        try:
+            with self.session.post(
+                f"{self.base_url}/chat/completions",
+                json=payload,
+                stream=True,
+                headers={"Accept": "text/event-stream"}
+            ) as response:
+                print(f"Status Code: {response.status_code}")
+                print("Streaming response:")
+                buffer = ""
+                for chunk in response.iter_lines():
+                    if chunk:
+                        chunk = chunk.decode('utf-8')
+                        if chunk.startswith('data: '):
+                            chunk = chunk[6:]  # Remove 'data: ' prefix
+                            if chunk.strip() == '[DONE]':
+                                break
+                            try:
+                                data = json.loads(chunk)
+                                if 'choices' in data and len(data['choices']) > 0:
+                                    if 'delta' in data['choices'][0] and 'content' in data['choices'][0]['delta']:
+                                        content = data['choices'][0]['delta']['content']
+                                        print(content, end='', flush=True)
+                                        time.sleep(0.1)  # Add a small delay to simulate real-time streaming
+                            except json.JSONDecodeError:
+                                continue
+                print("\n✅ Chat completions (streaming) test passed!")
+        except Exception as e:
+            print(f"❌ Chat completions (streaming) test failed: {str(e)}")
+    def test_developer_info(self) -> None:
+        """Test the developer info endpoint."""
+        print("\n=== Testing Developer Info Endpoint ===")
+        try:
+            response = self.session.get(f"{self.base_url}/developer_info")
+            print(f"Status Code: {response.status_code}")
+            print("Developer Info:")
+            print(json.dumps(response.json(), indent=2))
+            assert response.status_code == 200
+            print("✅ Developer info test passed!")
+        except Exception as e:
+            print(f"❌ Developer info test failed: {str(e)}")
+    def run_all_tests(self) -> None:
+        """Run all tests sequentially."""
+        tests = [
+            self.test_health_check,
+            self.test_models,
+            self.test_chat_completions_non_streaming,
+            self.test_chat_completions_streaming,
+            self.test_developer_info
+        ]
+        print("🚀 Starting API Tests...")
+        start_time = time.time()
+        for test in tests:
+            test()
+        end_time = time.time()
+        duration = end_time - start_time
+        print(f"\n============================")
+        print(f"🏁 All tests completed in {duration:.2f} seconds")
+        print(f"============================")
+def main():
+    # Initialize tester with your API's base URL
+    tester = APITester("http://localhost:8000")
+    # Run all tests
+    tester.run_all_tests()
+if __name__ == "__main__":
+    main()