Spaces:
Runtime error
Runtime error
from ctransformers import AutoConfig, AutoModelForCausalLM | |
from modules import shared | |
from modules.callbacks import Iteratorize | |
from modules.logging_colors import logger | |
class CtransformersModel: | |
def __init__(self): | |
pass | |
def from_pretrained(cls, path): | |
result = cls() | |
config = AutoConfig.from_pretrained( | |
str(path), | |
threads=shared.args.threads if shared.args.threads != 0 else -1, | |
gpu_layers=shared.args.n_gpu_layers, | |
batch_size=shared.args.n_batch, | |
context_length=shared.args.n_ctx, | |
stream=True, | |
mmap=not shared.args.no_mmap, | |
mlock=shared.args.mlock | |
) | |
result.model = AutoModelForCausalLM.from_pretrained( | |
str(result.model_dir(path) if result.model_type_is_auto() else path), | |
model_type=(None if result.model_type_is_auto() else shared.args.model_type), | |
config=config | |
) | |
logger.info(f'Using ctransformers model_type: {result.model.model_type} for {result.model.model_path}') | |
return result, result | |
def model_type_is_auto(self): | |
return shared.args.model_type is None or shared.args.model_type == "Auto" or shared.args.model_type == "None" | |
def model_dir(self, path): | |
if path.is_file(): | |
return path.parent | |
return path | |
def encode(self, string, **kwargs): | |
return self.model.tokenize(string) | |
def decode(self, ids): | |
return self.model.detokenize(ids) | |
def generate(self, prompt, state, callback=None): | |
prompt = prompt if type(prompt) is str else prompt.decode() | |
# ctransformers uses -1 for random seed | |
generator = self.model( | |
prompt=prompt, | |
max_new_tokens=state['max_new_tokens'], | |
temperature=state['temperature'], | |
top_p=state['top_p'], | |
top_k=state['top_k'], | |
repetition_penalty=state['repetition_penalty'], | |
last_n_tokens=state['repetition_penalty_range'], | |
seed=int(state['seed']) | |
) | |
output = "" | |
for token in generator: | |
if callback: | |
callback(token) | |
output += token | |
return output | |
def generate_with_streaming(self, *args, **kwargs): | |
with Iteratorize(self.generate, args, kwargs, callback=None) as generator: | |
reply = '' | |
for token in generator: | |
reply += token | |
yield reply | |