Spaces:

monsterapi
/

Monster-SD

Sleeping

App Files Files Community

Monster-SD / MonsterAPIClient.py

VikasQblocks

Add gradio app and monsterapi v2 client for SD Comparison gradio app.

c5c1856 almost 2 years ago

raw

history blame contribute delete

6.84 kB

	#MonsterAPIClient.py

	"""
	Monster API Python client to connect to LLM models on monsterapi

	Base URL: https://api.monsterapi.ai/v1/generate/{model}

	Available models:
	-----------------

	LLMs:
	1. falcon-7b-instruct
	2. falcon-40b-instruct
	3. mpt-30B-instruct
	4. mpt-7b-instruct
	5. openllama-13b-base
	6. llama2-7b-chat

	Text to Image:
	1. stable-diffusion v1.5
	2. stable-diffusion XL V1.0

	"""
	import os
	import time
	import logging
	import requests
	from requests_toolbelt.multipart.encoder import MultipartEncoder

	from typing import Optional, Literal, Union, List, Dict
	from pydantic import BaseModel, Field

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	class LLMInputModel1(BaseModel):
	"""
	Supports Following models: Falcon-40B-instruct, Falcon-7B-instruct, openllama-13b-base, llama2-7b-chat

	prompt string Prompt is a textual instruction for the model to produce an output. Required
	top_k integer Top-k sampling helps improve quality by removing the tail and making it less likely to go off topic. Optional
	(Default: 40)
	top_p float Top-p sampling helps generate more diverse and creative text by considering a broader range of tokens. Optional
	(Default: 1.0)
	temp float The temperature influences the randomness of the next token predictions. Optional
	(Default: 0.98)
	max_length integer The maximum length of the generated text. Optional
	(Default: 256)
	repetition_penalty float The model uses this penalty to discourage the repetition of tokens in the output. Optional
	(Default: 1.2)
	beam_size integer The beam size for beam search. A larger beam size results in better quality output, but slower generation times. Optional
	(Default: 1)
	"""
	prompt: str
	top_k: int = 40
	top_p: float = Field(0.9, ge=0., le=1.)
	temp: float = Field(0.98, ge=0., le=1.)
	max_length: int = 256
	repetition_penalty: float = 1.2
	beam_size: int = 1


	class LLMInputModel2(BaseModel):
	"""
	Supports Following models: MPT-30B-instruct, MPT-7B-instruct

	prompt: string Instruction is a textual command for the model to produce an output. Required
	top_k integer Top-k sampling helps improve quality by removing the tail and making it less likely to go off topic. Optional
	(Default: 40)
	top_p float Top-p sampling helps generate more diverse and creative text by considering a broader range of tokens. Optional
	Allowed Range: 0 - 1
	(Default: 1.0)
	temp float Temperature is a parameter that controls the randomness of the model's output. The higher the temperature, the more random the output. Optional
	(Default: 0.98)
	max_length integer Maximum length of the generated output. Optional
	(Default: 256)
	"""
	prompt: str
	top_k: int = 40
	top_p: float = Field(0.9, ge=0., le=1.)
	temp: float = Field(0.98, ge=0., le=1.)
	max_length: int = 256

	class SDInputModel(BaseModel):
	"""
	Support following models: text2img, text2img-sdxl

	prompt: string Your input text prompt Required
	negprompt: string Negative text prompt Optional
	samples: integer No. of images to be generated. Allowed range: 1-4 Optional
	(Default: 1)
	steps: integer Sampling steps per image. Allowed range 30-500 Optional
	(Default: 30)
	aspect_ratio: string. Allowed values: square, landscape, portrait Optional
	(Default: square)
	guidance_scale: float. Prompt guidance scale Optional
	(Default: 7.5)
	seed: integer Random number used to initialize the image generation. Optional
	(Default: random)
	"""
	prompt: str
	negprompt: Optional[str] = ""
	samples: Optional[int] = Field(1, ge=1, le=4)
	steps: Optional[int] = Field(30, ge=30, le=500)
	aspect_ratio: Optional[Literal['square', 'landscape', 'portrait']] = 'square'
	guidance_scale: Optional[float] = 7.5
	seed: Optional[int] = None


	MODELS_TO_DATAMODEL = {
	'falcon-7b-instruct': LLMInputModel1,
	'falcon-40b-instruct': LLMInputModel1,
	'mpt-30B-instruct': LLMInputModel2,
	'mpt-7b-instruct': LLMInputModel2,
	'openllama-13b-base': LLMInputModel1,
	'llama2-7b-chat': LLMInputModel1,
	"sdxl-base": SDInputModel,
	"txt2img": SDInputModel
	}


	class MClient():
	def __init__(self):
	self.boundary = '---011000010111000001101001'
	self.auth_token = os.environ.get('MONSTER_API_KEY')
	self.headers = {
	"accept": "application/json",
	"content-type": f"multipart/form-data; boundary={self.boundary}",
	'Authorization': 'Bearer ' + self.auth_token}
	self.base_url = 'https://api.monsterapi.ai/v1'
	self.models_to_data_model = MODELS_TO_DATAMODEL
	self.mock = os.environ.get('MOCK_Runner', "False").lower() == "true"

	def get_response(self, model:Literal['falcon-7b-instruct', 'falcon-40b-instruct', 'mpt-30B-instruct', 'mpt-7b-instruct', 'openllama-13b-base', 'llama2-7b-chat'],
	data: dict):

	if model not in self.models_to_data_model:
	raise ValueError(f"Invalid model: {model}!")

	dataModel = self.models_to_data_model[model](**data)
	url = f"{self.base_url}/generate/{model}"
	data = dataModel.dict()
	logger.info(f"Calling Monster API with url: {url}, with payload: {data}")

	# convert all values into string
	for key, value in data.items():
	data[key] = str(value)
	multipart_data = MultipartEncoder(fields=data, boundary=self.boundary)
	response = requests.post(url, headers=self.headers, data=multipart_data)
	response.raise_for_status()
	return response.json()

	def get_status(self, process_id):
	# /v1/status/{process_id}
	url = f"{self.base_url}/status/{process_id}"
	response = requests.get(url, headers=self.headers)
	response.raise_for_status()
	return response.json()

	def wait_and_get_result(self, process_id, timeout=100):
	start_time = time.time()
	while True:
	elapsed_time = time.time() - start_time

	if elapsed_time >= timeout:
	raise TimeoutError(f"Process {process_id} timed out after {timeout} seconds.")

	status = self.get_status(process_id)
	if status['status'].lower() == 'completed':
	return status['result']
	elif status['status'].lower() == 'failed':
	raise RuntimeError(f"Process {process_id} failed! {status}")
	else:
	if self.mock:
	return 100 * "Mock Output!"
	logger.info(f"Process {process_id} is still running, status is {status['status']}. Waiting ...")
	time.sleep(0.01)