|
from ctransformers import AutoConfig, AutoModelForCausalLM |
|
|
|
from modules import shared |
|
from modules.callbacks import Iteratorize |
|
from modules.logging_colors import logger |
|
|
|
|
|
class CtransformersModel: |
|
def __init__(self): |
|
pass |
|
|
|
@classmethod |
|
def from_pretrained(cls, path): |
|
result = cls() |
|
|
|
config = AutoConfig.from_pretrained( |
|
str(path), |
|
threads=shared.args.threads if shared.args.threads != 0 else -1, |
|
gpu_layers=shared.args.n_gpu_layers, |
|
batch_size=shared.args.n_batch, |
|
context_length=shared.args.n_ctx, |
|
stream=True, |
|
mmap=not shared.args.no_mmap, |
|
mlock=shared.args.mlock |
|
) |
|
|
|
result.model = AutoModelForCausalLM.from_pretrained( |
|
str(result.model_dir(path) if result.model_type_is_auto() else path), |
|
model_type=(None if result.model_type_is_auto() else shared.args.model_type), |
|
config=config |
|
) |
|
|
|
logger.info(f'Using ctransformers model_type: {result.model.model_type} for {result.model.model_path}') |
|
return result, result |
|
|
|
def model_type_is_auto(self): |
|
return shared.args.model_type is None or shared.args.model_type == "Auto" or shared.args.model_type == "None" |
|
|
|
def model_dir(self, path): |
|
if path.is_file(): |
|
return path.parent |
|
|
|
return path |
|
|
|
def encode(self, string, **kwargs): |
|
return self.model.tokenize(string) |
|
|
|
def decode(self, ids): |
|
return self.model.detokenize(ids) |
|
|
|
def generate(self, prompt, state, callback=None): |
|
prompt = prompt if type(prompt) is str else prompt.decode() |
|
|
|
generator = self.model( |
|
prompt=prompt, |
|
max_new_tokens=state['max_new_tokens'], |
|
temperature=state['temperature'], |
|
top_p=state['top_p'], |
|
top_k=state['top_k'], |
|
repetition_penalty=state['repetition_penalty'], |
|
last_n_tokens=state['repetition_penalty_range'], |
|
seed=int(state['seed']) |
|
) |
|
|
|
output = "" |
|
for token in generator: |
|
if callback: |
|
callback(token) |
|
|
|
output += token |
|
|
|
return output |
|
|
|
def generate_with_streaming(self, *args, **kwargs): |
|
with Iteratorize(self.generate, args, kwargs, callback=None) as generator: |
|
reply = '' |
|
for token in generator: |
|
reply += token |
|
yield reply |
|
|