Spaces:
Build error
Build error
| #MonsterAPIClient.py | |
| """ | |
| Monster API Python client to connect to LLM models on monsterapi | |
| Base URL: https://api.monsterapi.ai/v1/generate/{model} | |
| Available models: | |
| ----------------- | |
| LLMs: | |
| 1. falcon-7b-instruct | |
| 2. falcon-40b-instruct | |
| 3. mpt-30B-instruct | |
| 4. mpt-7b-instruct | |
| 5. openllama-13b-base | |
| 6. llama2-7b-chat | |
| Text to Image: | |
| 1. stable-diffusion v1.5 | |
| 2. stable-diffusion XL V1.0 | |
| """ | |
| import os | |
| import time | |
| import logging | |
| import requests | |
| from requests_toolbelt.multipart.encoder import MultipartEncoder | |
| from typing import Optional, Literal, Union, List, Dict | |
| from pydantic import BaseModel, Field | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class LLMInputModel1(BaseModel): | |
| """ | |
| Supports Following models: Falcon-40B-instruct, Falcon-7B-instruct, openllama-13b-base, llama2-7b-chat | |
| prompt string Prompt is a textual instruction for the model to produce an output. Required | |
| top_k integer Top-k sampling helps improve quality by removing the tail and making it less likely to go off topic. Optional | |
| (Default: 40) | |
| top_p float Top-p sampling helps generate more diverse and creative text by considering a broader range of tokens. Optional | |
| (Default: 1.0) | |
| temp float The temperature influences the randomness of the next token predictions. Optional | |
| (Default: 0.98) | |
| max_length integer The maximum length of the generated text. Optional | |
| (Default: 256) | |
| repetition_penalty float The model uses this penalty to discourage the repetition of tokens in the output. Optional | |
| (Default: 1.2) | |
| beam_size integer The beam size for beam search. A larger beam size results in better quality output, but slower generation times. Optional | |
| (Default: 1) | |
| """ | |
| prompt: str | |
| top_k: int = 40 | |
| top_p: float = Field(0.9, ge=0., le=1.) | |
| temp: float = Field(0.98, ge=0., le=1.) | |
| max_length: int = 256 | |
| repetition_penalty: float = 1.2 | |
| beam_size: int = 1 | |
| class LLMInputModel2(BaseModel): | |
| """ | |
| Supports Following models: MPT-30B-instruct, MPT-7B-instruct | |
| prompt: string Instruction is a textual command for the model to produce an output. Required | |
| top_k integer Top-k sampling helps improve quality by removing the tail and making it less likely to go off topic. Optional | |
| (Default: 40) | |
| top_p float Top-p sampling helps generate more diverse and creative text by considering a broader range of tokens. Optional | |
| Allowed Range: 0 - 1 | |
| (Default: 1.0) | |
| temp float Temperature is a parameter that controls the randomness of the model's output. The higher the temperature, the more random the output. Optional | |
| (Default: 0.98) | |
| max_length integer Maximum length of the generated output. Optional | |
| (Default: 256) | |
| """ | |
| prompt: str | |
| top_k: int = 40 | |
| top_p: float = Field(0.9, ge=0., le=1.) | |
| temp: float = Field(0.98, ge=0., le=1.) | |
| max_length: int = 256 | |
| class SDInputModel(BaseModel): | |
| """ | |
| Support following models: text2img, text2img-sdxl | |
| prompt: string Your input text prompt Required | |
| negprompt: string Negative text prompt Optional | |
| samples: integer No. of images to be generated. Allowed range: 1-4 Optional | |
| (Default: 1) | |
| steps: integer Sampling steps per image. Allowed range 30-500 Optional | |
| (Default: 30) | |
| aspect_ratio: string. Allowed values: square, landscape, portrait Optional | |
| (Default: square) | |
| guidance_scale: float. Prompt guidance scale Optional | |
| (Default: 7.5) | |
| seed: integer Random number used to initialize the image generation. Optional | |
| (Default: random) | |
| """ | |
| prompt: str | |
| negprompt: Optional[str] = "" | |
| samples: Optional[int] = Field(1, ge=1, le=4) | |
| steps: Optional[int] = Field(30, ge=30, le=500) | |
| aspect_ratio: Optional[Literal['square', 'landscape', 'portrait']] = 'square' | |
| guidance_scale: Optional[float] = 7.5 | |
| seed: Optional[int] = None | |
| MODELS_TO_DATAMODEL = { | |
| 'falcon-7b-instruct': LLMInputModel1, | |
| 'falcon-40b-instruct': LLMInputModel1, | |
| 'mpt-30B-instruct': LLMInputModel2, | |
| 'mpt-7b-instruct': LLMInputModel2, | |
| 'openllama-13b-base': LLMInputModel1, | |
| 'llama2-7b-chat': LLMInputModel1, | |
| "sdxl-base": SDInputModel, | |
| "txt2img": SDInputModel | |
| } | |
| class MClient(): | |
| def __init__(self): | |
| self.boundary = '---011000010111000001101001' | |
| self.auth_token = os.environ.get('MONSTER_API_KEY') | |
| self.headers = { | |
| "accept": "application/json", | |
| "content-type": f"multipart/form-data; boundary={self.boundary}", | |
| 'Authorization': 'Bearer ' + self.auth_token} | |
| self.base_url = 'https://api.monsterapi.ai/v1' | |
| self.models_to_data_model = MODELS_TO_DATAMODEL | |
| self.mock = os.environ.get('MOCK_Runner', "False").lower() == "true" | |
| def get_response(self, model:Literal['falcon-7b-instruct', 'falcon-40b-instruct', 'mpt-30B-instruct', 'mpt-7b-instruct', 'openllama-13b-base', 'llama2-7b-chat'], | |
| data: dict): | |
| if model not in self.models_to_data_model: | |
| raise ValueError(f"Invalid model: {model}!") | |
| dataModel = self.models_to_data_model[model](**data) | |
| url = f"{self.base_url}/generate/{model}" | |
| data = dataModel.dict() | |
| logger.info(f"Calling Monster API with url: {url}, with payload: {data}") | |
| # convert all values into string | |
| for key, value in data.items(): | |
| data[key] = str(value) | |
| multipart_data = MultipartEncoder(fields=data, boundary=self.boundary) | |
| response = requests.post(url, headers=self.headers, data=multipart_data) | |
| response.raise_for_status() | |
| return response.json() | |
| def get_status(self, process_id): | |
| # /v1/status/{process_id} | |
| url = f"{self.base_url}/status/{process_id}" | |
| response = requests.get(url, headers=self.headers) | |
| response.raise_for_status() | |
| return response.json() | |
| def wait_and_get_result(self, process_id, timeout=100): | |
| start_time = time.time() | |
| while True: | |
| elapsed_time = time.time() - start_time | |
| if elapsed_time >= timeout: | |
| raise TimeoutError(f"Process {process_id} timed out after {timeout} seconds.") | |
| status = self.get_status(process_id) | |
| if status['status'].lower() == 'completed': | |
| return status['result'] | |
| elif status['status'].lower() == 'failed': | |
| raise RuntimeError(f"Process {process_id} failed! {status}") | |
| else: | |
| if self.mock: | |
| return 100 * "Mock Output!" | |
| logger.info(f"Process {process_id} is still running, status is {status['status']}. Waiting ...") | |
| time.sleep(0.01) |