Spaces:

rodrigomasini
/

advanced-ui-for-gw

Runtime error

App Files Files Community

rodrigomasini commited on Feb 27

Commit

ba553c4

•

1 Parent(s): 1cb7677

Upload 12 files

Browse files

Files changed (12) hide show

modules/callbacks.py +94 -0
modules/chat.py +664 -0
modules/evaluate.py +154 -0
modules/html_generator.py +273 -0
modules/loaders.py +291 -0
modules/models.py +343 -0
modules/models_settings.py +137 -0
modules/presets.py +66 -0
modules/relative_imports.py +13 -0
modules/text_generation.py +337 -0
modules/ui.py +206 -0
modules/utils.py +127 -0

modules/callbacks.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import gc
+import traceback
+from queue import Queue
+from threading import Thread
+import torch
+import transformers
+import modules.shared as shared
+class _StopEverythingStoppingCriteria(transformers.StoppingCriteria):
+    def __init__(self):
+        transformers.StoppingCriteria.__init__(self)
+    def __call__(self, input_ids: torch.LongTensor, _scores: torch.FloatTensor) -> bool:
+        return shared.stop_everything
+class Stream(transformers.StoppingCriteria):
+    def __init__(self, callback_func=None):
+        self.callback_func = callback_func
+    def __call__(self, input_ids, scores) -> bool:
+        if self.callback_func is not None:
+            self.callback_func(input_ids[0])
+        return False
+class Iteratorize:
+    """
+    Transforms a function that takes a callback
+    into a lazy iterator (generator).
+    Adapted from: https://stackoverflow.com/a/9969000
+    """
+    def __init__(self, func, args=None, kwargs=None, callback=None):
+        self.mfunc = func
+        self.c_callback = callback
+        self.q = Queue()
+        self.sentinel = object()
+        self.args = args or []
+        self.kwargs = kwargs or {}
+        self.stop_now = False
+        def _callback(val):
+            if self.stop_now or shared.stop_everything:
+                raise ValueError
+            self.q.put(val)
+        def gentask():
+            try:
+                ret = self.mfunc(callback=_callback, *args, **self.kwargs)
+            except ValueError:
+                pass
+            except:
+                traceback.print_exc()
+                pass
+            clear_torch_cache()
+            self.q.put(self.sentinel)
+            if self.c_callback:
+                self.c_callback(ret)
+        self.thread = Thread(target=gentask)
+        self.thread.start()
+    def __iter__(self):
+        return self
+    def __next__(self):
+        obj = self.q.get(True, None)
+        if obj is self.sentinel:
+            raise StopIteration
+        else:
+            return obj
+    def __del__(self):
+        clear_torch_cache()
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop_now = True
+        clear_torch_cache()
+def clear_torch_cache():
+    gc.collect()
+    if not shared.args.cpu:
+        torch.cuda.empty_cache()

modules/chat.py ADDED Viewed

	@@ -0,0 +1,664 @@

+import base64
+import copy
+import functools
+import json
+import re
+from datetime import datetime
+from pathlib import Path
+import gradio as gr
+import yaml
+from PIL import Image
+import modules.shared as shared
+from modules.extensions import apply_extensions
+from modules.html_generator import chat_html_wrapper, make_thumbnail
+from modules.logging_colors import logger
+from modules.text_generation import (
+    generate_reply,
+    get_encoded_length,
+    get_max_prompt_length
+)
+from modules.utils import (
+    delete_file,
+    get_available_characters,
+    replace_all,
+    save_file
+)
+def str_presenter(dumper, data):
+    """
+    Copied from https://github.com/yaml/pyyaml/issues/240
+    Makes pyyaml output prettier multiline strings.
+    """
+    if data.count('\n') > 0:
+        return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
+    return dumper.represent_scalar('tag:yaml.org,2002:str', data)
+yaml.add_representer(str, str_presenter)
+yaml.representer.SafeRepresenter.add_representer(str, str_presenter)
+def get_turn_substrings(state, instruct=False):
+    if instruct:
+        if 'turn_template' not in state or state['turn_template'] == '':
+            template = '<|user|>\n<|user-message|>\n<|bot|>\n<|bot-message|>\n'
+        else:
+            template = state['turn_template'].replace(r'\n', '\n')
+    else:
+        template = '<|user|>: <|user-message|>\n<|bot|>: <|bot-message|>\n'
+    replacements = {
+        '<|user|>': state['name1_instruct' if instruct else 'name1'].strip(),
+        '<|bot|>': state['name2_instruct' if instruct else 'name2'].strip(),
+    }
+    output = {
+        'user_turn': template.split('<|bot|>')[0],
+        'bot_turn': '<|bot|>' + template.split('<|bot|>')[1],
+        'user_turn_stripped': template.split('<|bot|>')[0].split('<|user-message|>')[0],
+        'bot_turn_stripped': '<|bot|>' + template.split('<|bot|>')[1].split('<|bot-message|>')[0],
+    }
+    for k in output:
+        output[k] = replace_all(output[k], replacements)
+    return output
+def generate_chat_prompt(user_input, state, **kwargs):
+    impersonate = kwargs.get('impersonate', False)
+    _continue = kwargs.get('_continue', False)
+    also_return_rows = kwargs.get('also_return_rows', False)
+    history = kwargs.get('history', state['history'])['internal']
+    is_instruct = state['mode'] == 'instruct'
+    # Find the maximum prompt size
+    max_length = get_max_prompt_length(state)
+    all_substrings = {
+        'chat': get_turn_substrings(state, instruct=False),
+        'instruct': get_turn_substrings(state, instruct=True)
+    }
+    substrings = all_substrings['instruct' if is_instruct else 'chat']
+    # Create the template for "chat-instruct" mode
+    if state['mode'] == 'chat-instruct':
+        wrapper = ''
+        command = state['chat-instruct_command'].replace('<|character|>', state['name2'] if not impersonate else state['name1'])
+        wrapper += state['context_instruct']
+        wrapper += all_substrings['instruct']['user_turn'].replace('<|user-message|>', command)
+        wrapper += all_substrings['instruct']['bot_turn_stripped']
+        if impersonate:
+            wrapper += substrings['user_turn_stripped'].rstrip(' ')
+        elif _continue:
+            wrapper += apply_extensions('bot_prefix', substrings['bot_turn_stripped'], state)
+            wrapper += history[-1][1]
+        else:
+            wrapper += apply_extensions('bot_prefix', substrings['bot_turn_stripped'].rstrip(' '), state)
+    else:
+        wrapper = '<|prompt|>'
+    if is_instruct:
+        context = state['context_instruct']
+    else:
+        context = replace_character_names(
+            f"{state['context'].strip()}\n",
+            state['name1'],
+            state['name2']
+        )
+    # Build the prompt
+    rows = [context]
+    min_rows = 3
+    i = len(history) - 1
+    while i >= 0 and get_encoded_length(wrapper.replace('<|prompt|>', ''.join(rows))) < max_length:
+        if _continue and i == len(history) - 1:
+            if state['mode'] != 'chat-instruct':
+                rows.insert(1, substrings['bot_turn_stripped'] + history[i][1].strip())
+        else:
+            rows.insert(1, substrings['bot_turn'].replace('<|bot-message|>', history[i][1].strip()))
+        string = history[i][0]
+        if string not in ['', '<|BEGIN-VISIBLE-CHAT|>']:
+            rows.insert(1, replace_all(substrings['user_turn'], {'<|user-message|>': string.strip(), '<|round|>': str(i)}))
+        i -= 1
+    if impersonate:
+        if state['mode'] == 'chat-instruct':
+            min_rows = 1
+        else:
+            min_rows = 2
+            rows.append(substrings['user_turn_stripped'].rstrip(' '))
+    elif not _continue:
+        # Add the user message
+        if len(user_input) > 0:
+            rows.append(replace_all(substrings['user_turn'], {'<|user-message|>': user_input.strip(), '<|round|>': str(len(history))}))
+        # Add the character prefix
+        if state['mode'] != 'chat-instruct':
+            rows.append(apply_extensions('bot_prefix', substrings['bot_turn_stripped'].rstrip(' '), state))
+    while len(rows) > min_rows and get_encoded_length(wrapper.replace('<|prompt|>', ''.join(rows))) >= max_length:
+        rows.pop(1)
+    prompt = wrapper.replace('<|prompt|>', ''.join(rows))
+    if also_return_rows:
+        return prompt, rows
+    else:
+        return prompt
+def get_stopping_strings(state):
+    stopping_strings = []
+    if state['mode'] in ['instruct', 'chat-instruct']:
+        stopping_strings += [
+            state['turn_template'].split('<|user-message|>')[1].split('<|bot|>')[0] + '<|bot|>',
+            state['turn_template'].split('<|bot-message|>')[1] + '<|user|>'
+        ]
+        replacements = {
+            '<|user|>': state['name1_instruct'],
+            '<|bot|>': state['name2_instruct']
+        }
+        for i in range(len(stopping_strings)):
+            stopping_strings[i] = replace_all(stopping_strings[i], replacements).rstrip(' ').replace(r'\n', '\n')
+    if state['mode'] in ['chat', 'chat-instruct']:
+        stopping_strings += [
+            f"\n{state['name1']}:",
+            f"\n{state['name2']}:"
+        ]
+    if state['stop_at_newline']:
+        stopping_strings.append("\n")
+    return stopping_strings
+def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_message=True):
+    history = state['history']
+    output = copy.deepcopy(history)
+    output = apply_extensions('history', output)
+    state = apply_extensions('state', state)
+    if shared.model_name == 'None' or shared.model is None:
+        logger.error("No model is loaded! Select one in the Model tab.")
+        yield output
+        return
+    # Defining some variables
+    just_started = True
+    visible_text = None
+    stopping_strings = get_stopping_strings(state)
+    is_stream = state['stream']
+    # Preparing the input
+    if not any((regenerate, _continue)):
+        visible_text = text
+        text, visible_text = apply_extensions('chat_input', text, visible_text, state)
+        text = apply_extensions('input', text, state)
+        # *Is typing...*
+        if loading_message:
+            yield {'visible': output['visible'] + [[visible_text, shared.processing_message]], 'internal': output['internal']}
+    else:
+        text, visible_text = output['internal'][-1][0], output['visible'][-1][0]
+        if regenerate:
+            output['visible'].pop()
+            output['internal'].pop()
+            # *Is typing...*
+            if loading_message:
+                yield {'visible': output['visible'] + [[visible_text, shared.processing_message]], 'internal': output['internal']}
+        elif _continue:
+            last_reply = [output['internal'][-1][1], output['visible'][-1][1]]
+            if loading_message:
+                yield {'visible': output['visible'][:-1] + [[visible_text, last_reply[1] + '...']], 'internal': output['internal']}
+    # Generating the prompt
+    kwargs = {
+        '_continue': _continue,
+        'history': output,
+    }
+    prompt = apply_extensions('custom_generate_chat_prompt', text, state, **kwargs)
+    if prompt is None:
+        prompt = generate_chat_prompt(text, state, **kwargs)
+    # Generate
+    cumulative_reply = ''
+    for i in range(state['chat_generation_attempts']):
+        reply = None
+        for j, reply in enumerate(generate_reply(prompt + cumulative_reply, state, stopping_strings=stopping_strings, is_chat=True)):
+            reply = cumulative_reply + reply
+            # Extract the reply
+            visible_reply = re.sub("(<USER>|<user>|{{user}})", state['name1'], reply)
+            # We need this global variable to handle the Stop event,
+            # otherwise gradio gets confused
+            if shared.stop_everything:
+                output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state)
+                yield output
+                return
+            if just_started:
+                just_started = False
+                if not _continue:
+                    output['internal'].append(['', ''])
+                    output['visible'].append(['', ''])
+            if _continue:
+                output['internal'][-1] = [text, last_reply[0] + reply]
+                output['visible'][-1] = [visible_text, last_reply[1] + visible_reply]
+                if is_stream:
+                    yield output
+            elif not (j == 0 and visible_reply.strip() == ''):
+                output['internal'][-1] = [text, reply.lstrip(' ')]
+                output['visible'][-1] = [visible_text, visible_reply.lstrip(' ')]
+                if is_stream:
+                    yield output
+        if reply in [None, cumulative_reply]:
+            break
+        else:
+            cumulative_reply = reply
+    output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state)
+    yield output
+def impersonate_wrapper(text, start_with, state):
+    if shared.model_name == 'None' or shared.model is None:
+        logger.error("No model is loaded! Select one in the Model tab.")
+        yield ''
+        return
+    # Defining some variables
+    cumulative_reply = ''
+    prompt = generate_chat_prompt('', state, impersonate=True)
+    stopping_strings = get_stopping_strings(state)
+    yield text + '...'
+    cumulative_reply = text
+    for i in range(state['chat_generation_attempts']):
+        reply = None
+        for reply in generate_reply(prompt + cumulative_reply, state, stopping_strings=stopping_strings, is_chat=True):
+            reply = cumulative_reply + reply
+            yield reply.lstrip(' ')
+            if shared.stop_everything:
+                return
+        if reply in [None, cumulative_reply]:
+            break
+        else:
+            cumulative_reply = reply
+    yield cumulative_reply.lstrip(' ')
+def generate_chat_reply(text, state, regenerate=False, _continue=False, loading_message=True):
+    history = state['history']
+    if regenerate or _continue:
+        text = ''
+        if (len(history['visible']) == 1 and not history['visible'][0][0]) or len(history['internal']) == 0:
+            yield history
+            return
+    for history in chatbot_wrapper(text, state, regenerate=regenerate, _continue=_continue, loading_message=loading_message):
+        yield history
+# Same as above but returns HTML for the UI
+def generate_chat_reply_wrapper(text, start_with, state, regenerate=False, _continue=False):
+    if start_with != '' and not _continue:
+        if regenerate:
+            text, state['history'] = remove_last_message(state['history'])
+            regenerate = False
+        _continue = True
+        send_dummy_message(text, state)
+        send_dummy_reply(start_with, state)
+    for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True)):
+        yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style']), history
+def remove_last_message(history):
+    if len(history['visible']) > 0 and history['internal'][-1][0] != '<|BEGIN-VISIBLE-CHAT|>':
+        last = history['visible'].pop()
+        history['internal'].pop()
+    else:
+        last = ['', '']
+    return last[0], history
+def send_last_reply_to_input(history):
+    if len(history['internal']) > 0:
+        return history['internal'][-1][1]
+    else:
+        return ''
+def replace_last_reply(text, state):
+    history = state['history']
+    if len(history['visible']) > 0:
+        history['visible'][-1][1] = text
+        history['internal'][-1][1] = apply_extensions('input', text, state)
+    return history
+def send_dummy_message(text, state):
+    history = state['history']
+    history['visible'].append([text, ''])
+    history['internal'].append([apply_extensions('input', text, state), ''])
+    return history
+def send_dummy_reply(text, state):
+    history = state['history']
+    if len(history['visible']) > 0 and not history['visible'][-1][1] == '':
+        history['visible'].append(['', ''])
+        history['internal'].append(['', ''])
+    history['visible'][-1][1] = text
+    history['internal'][-1][1] = apply_extensions('input', text, state)
+    return history
+def clear_chat_log(state):
+    greeting = replace_character_names(state['greeting'], state['name1'], state['name2'])
+    mode = state['mode']
+    history = state['history']
+    history['visible'] = []
+    history['internal'] = []
+    if mode != 'instruct':
+        if greeting != '':
+            history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]]
+            history['visible'] += [['', apply_extensions('output', greeting, state)]]
+    return history
+def redraw_html(history, name1, name2, mode, style, reset_cache=False):
+    return chat_html_wrapper(history, name1, name2, mode, style, reset_cache=reset_cache)
+def save_history(history, path=None):
+    p = path or Path('logs/exported_history.json')
+    with open(p, 'w', encoding='utf-8') as f:
+        f.write(json.dumps(history, indent=4))
+    return p
+def load_history(file, history):
+    try:
+        file = file.decode('utf-8')
+        j = json.loads(file)
+        if 'internal' in j and 'visible' in j:
+            return j
+        else:
+            return history
+    except:
+        return history
+def save_history_at_user_request(history, character, mode):
+    def make_timestamp_path(character=None):
+        return f"logs/{character or ''}{'_' if character else ''}{datetime.now().strftime('%Y%m%d-%H%M%S')}.json"
+    path = None
+    if mode in ['chat', 'chat-instruct'] and character not in ['', 'None', None]:
+        path = make_timestamp_path(character)
+    else:
+        # Try to use mode as the file name, otherwise just use the timestamp
+        try:
+            path = make_timestamp_path(mode.capitalize())
+        except:
+            path = make_timestamp_path()
+    return save_history(history, path)
+def save_persistent_history(history, character, mode):
+    if mode in ['chat', 'chat-instruct'] and character not in ['', 'None', None] and not shared.args.multi_user:
+        save_history(history, path=Path(f'logs/{character}_persistent.json'))
+def load_persistent_history(state):
+    if state['mode'] == 'instruct':
+        return state['history']
+    character = state['character_menu']
+    greeting = replace_character_names(state['greeting'], state['name1'], state['name2'])
+    p = Path(f'logs/{character}_persistent.json')
+    if not shared.args.multi_user and character not in ['None', '', None] and p.exists():
+        f = json.loads(open(p, 'rb').read())
+        if 'internal' in f and 'visible' in f:
+            history = f
+        else:
+            history = {'internal': [], 'visible': []}
+            history['internal'] = f['data']
+            history['visible'] = f['data_visible']
+    else:
+        history = {'internal': [], 'visible': []}
+        if greeting != "":
+            history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]]
+            history['visible'] += [['', apply_extensions('output', greeting, state)]]
+    return history
+def replace_character_names(text, name1, name2):
+    text = text.replace('{{user}}', name1).replace('{{char}}', name2)
+    return text.replace('<USER>', name1).replace('<BOT>', name2)
+def generate_pfp_cache(character):
+    cache_folder = Path("cache")
+    if not cache_folder.exists():
+        cache_folder.mkdir()
+    for path in [Path(f"characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
+        if path.exists():
+            img = make_thumbnail(Image.open(path))
+            img.save(Path('cache/pfp_character.png'), format='PNG')
+            return img
+    return None
+def load_character(character, name1, name2, instruct=False):
+    context = greeting = turn_template = ""
+    greeting_field = 'greeting'
+    picture = None
+    # Deleting the profile picture cache, if any
+    if Path("cache/pfp_character.png").exists():
+        Path("cache/pfp_character.png").unlink()
+    if character not in ['None', '', None]:
+        folder = 'characters' if not instruct else 'characters/instruction-following'
+        picture = generate_pfp_cache(character)
+        filepath = None
+        for extension in ["yml", "yaml", "json"]:
+            filepath = Path(f'{folder}/{character}.{extension}')
+            if filepath.exists():
+                break
+        if filepath is None:
+            logger.error(f"Could not find character file for {character} in {folder} folder. Please check your spelling.")
+            return name1, name2, picture, greeting, context, turn_template.replace("\n", r"\n")
+        file_contents = open(filepath, 'r', encoding='utf-8').read()
+        data = json.loads(file_contents) if extension == "json" else yaml.safe_load(file_contents)
+        # Finding the bot's name
+        for k in ['name', 'bot', '<|bot|>', 'char_name']:
+            if k in data and data[k] != '':
+                name2 = data[k]
+                break
+        # Find the user name (if any)
+        for k in ['your_name', 'user', '<|user|>']:
+            if k in data and data[k] != '':
+                name1 = data[k]
+                break
+        if 'context' in data:
+            context = data['context']
+            if not instruct:
+                context = context.strip() + '\n'
+        elif "char_persona" in data:
+            context = build_pygmalion_style_context(data)
+            greeting_field = 'char_greeting'
+        if 'example_dialogue' in data:
+            context += f"{data['example_dialogue'].strip()}\n"
+        if greeting_field in data:
+            greeting = data[greeting_field]
+        if 'turn_template' in data:
+            turn_template = data['turn_template']
+    else:
+        context = shared.settings['context']
+        name2 = shared.settings['name2']
+        greeting = shared.settings['greeting']
+        turn_template = shared.settings['turn_template']
+    return name1, name2, picture, greeting, context, turn_template.replace("\n", r"\n")
+@functools.cache
+def load_character_memoized(character, name1, name2, instruct=False):
+    return load_character(character, name1, name2, instruct=instruct)
+def upload_character(file, img, tavern=False):
+    decoded_file = file if type(file) == str else file.decode('utf-8')
+    try:
+        data = json.loads(decoded_file)
+    except:
+        data = yaml.safe_load(decoded_file)
+    if 'char_name' in data:
+        name = data['char_name']
+        greeting = data['char_greeting']
+        context = build_pygmalion_style_context(data)
+        yaml_data = generate_character_yaml(name, greeting, context)
+    else:
+        name = data['name']
+        yaml_data = generate_character_yaml(data['name'], data['greeting'], data['context'])
+    outfile_name = name
+    i = 1
+    while Path(f'characters/{outfile_name}.yaml').exists():
+        outfile_name = f'{name}_{i:03d}'
+        i += 1
+    with open(Path(f'characters/{outfile_name}.yaml'), 'w', encoding='utf-8') as f:
+        f.write(yaml_data)
+    if img is not None:
+        img.save(Path(f'characters/{outfile_name}.png'))
+    logger.info(f'New character saved to "characters/{outfile_name}.yaml".')
+    return gr.update(value=outfile_name, choices=get_available_characters())
+def build_pygmalion_style_context(data):
+    context = ""
+    if 'char_persona' in data and data['char_persona'] != '':
+        context += f"{data['char_name']}'s Persona: {data['char_persona']}\n"
+    if 'world_scenario' in data and data['world_scenario'] != '':
+        context += f"Scenario: {data['world_scenario']}\n"
+    context = f"{context.strip()}\n"
+    return context
+def upload_tavern_character(img, _json):
+    _json = {'char_name': _json['name'], 'char_persona': _json['description'], 'char_greeting': _json['first_mes'], 'example_dialogue': _json['mes_example'], 'world_scenario': _json['scenario']}
+    return upload_character(json.dumps(_json), img, tavern=True)
+def check_tavern_character(img):
+    if "chara" not in img.info:
+        return "Not a TavernAI card", None, None, gr.update(interactive=False)
+    decoded_string = base64.b64decode(img.info['chara']).replace(b'\\r\\n', b'\\n')
+    _json = json.loads(decoded_string)
+    if "data" in _json:
+        _json = _json["data"]
+    return _json['name'], _json['description'], _json, gr.update(interactive=True)
+def upload_your_profile_picture(img):
+    cache_folder = Path("cache")
+    if not cache_folder.exists():
+        cache_folder.mkdir()
+    if img is None:
+        if Path("cache/pfp_me.png").exists():
+            Path("cache/pfp_me.png").unlink()
+    else:
+        img = make_thumbnail(img)
+        img.save(Path('cache/pfp_me.png'))
+        logger.info('Profile picture saved to "cache/pfp_me.png"')
+def generate_character_yaml(name, greeting, context):
+    data = {
+        'name': name,
+        'greeting': greeting,
+        'context': context,
+    }
+    data = {k: v for k, v in data.items() if v}  # Strip falsy
+    return yaml.dump(data, sort_keys=False, width=float("inf"))
+def generate_instruction_template_yaml(user, bot, context, turn_template):
+    data = {
+        'user': user,
+        'bot': bot,
+        'turn_template': turn_template,
+        'context': context,
+    }
+    data = {k: v for k, v in data.items() if v}  # Strip falsy
+    return yaml.dump(data, sort_keys=False, width=float("inf"))
+def save_character(name, greeting, context, picture, filename):
+    if filename == "":
+        logger.error("The filename is empty, so the character will not be saved.")
+        return
+    data = generate_character_yaml(name, greeting, context)
+    filepath = Path(f'characters/{filename}.yaml')
+    save_file(filepath, data)
+    path_to_img = Path(f'characters/{filename}.png')
+    if picture is not None:
+        picture.save(path_to_img)
+        logger.info(f'Saved {path_to_img}.')
+def delete_character(name, instruct=False):
+    for extension in ["yml", "yaml", "json"]:
+        delete_file(Path(f'characters/{name}.{extension}'))
+    delete_file(Path(f'characters/{name}.png'))

modules/evaluate.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import datetime
+from pathlib import Path
+import pandas as pd
+import torch
+from datasets import load_dataset
+from tqdm import tqdm
+from modules import shared
+from modules.models import load_model, unload_model
+from modules.models_settings import (
+    get_model_settings_from_yamls,
+    update_model_parameters
+)
+from modules.text_generation import encode
+def load_past_evaluations():
+    if Path('logs/evaluations.csv').exists():
+        df = pd.read_csv(Path('logs/evaluations.csv'), dtype=str)
+        df['Perplexity'] = pd.to_numeric(df['Perplexity'])
+        return df
+    else:
+        return pd.DataFrame(columns=['Model', 'LoRAs', 'Dataset', 'Perplexity', 'stride', 'max_length', 'Date', 'Comment'])
+past_evaluations = load_past_evaluations()
+def save_past_evaluations(df):
+    global past_evaluations
+    past_evaluations = df
+    filepath = Path('logs/evaluations.csv')
+    filepath.parent.mkdir(parents=True, exist_ok=True)
+    df.to_csv(filepath, index=False)
+def calculate_perplexity(models, input_dataset, stride, _max_length):
+    '''
+    Based on:
+    https://huggingface.co/docs/transformers/perplexity#calculating-ppl-with-fixedlength-models
+    '''
+    global past_evaluations
+    cumulative_log = ''
+    cumulative_log += "Loading the input dataset...\n\n"
+    yield cumulative_log
+    # Copied from https://github.com/qwopqwop200/GPTQ-for-LLaMa/blob/triton/utils/datautils.py
+    if input_dataset == 'wikitext':
+        data = load_dataset('wikitext', 'wikitext-2-raw-v1', split='test')
+        text = "\n\n".join(data['text'])
+    elif input_dataset == 'ptb':
+        data = load_dataset('ptb_text_only', 'penn_treebank', split='validation')
+        text = "\n\n".join(data['sentence'])
+    elif input_dataset == 'ptb_new':
+        data = load_dataset('ptb_text_only', 'penn_treebank', split='test')
+        text = " ".join(data['sentence'])
+    else:
+        with open(Path(f'training/datasets/{input_dataset}.txt'), 'r', encoding='utf-8') as f:
+            text = f.read()
+    for model in models:
+        if is_in_past_evaluations(model, input_dataset, stride, _max_length):
+            cumulative_log += f"{model} has already been tested. Ignoring.\n\n"
+            yield cumulative_log
+            continue
+        if model != 'current model':
+            try:
+                yield cumulative_log + f"Loading {model}...\n\n"
+                model_settings = get_model_settings_from_yamls(model)
+                shared.settings.update(model_settings)  # hijacking the interface defaults
+                update_model_parameters(model_settings)  # hijacking the command-line arguments
+                shared.model_name = model
+                unload_model()
+                shared.model, shared.tokenizer = load_model(shared.model_name)
+            except:
+                cumulative_log += f"Failed to load {model}. Moving on.\n\n"
+                yield cumulative_log
+                continue
+        cumulative_log += f"Processing {shared.model_name}...\n\n"
+        yield cumulative_log + "Tokenizing the input dataset...\n\n"
+        encodings = encode(text, add_special_tokens=False)
+        seq_len = encodings.shape[1]
+        if _max_length:
+            max_length = _max_length
+        elif hasattr(shared.model.config, 'max_position_embeddings'):
+            max_length = shared.model.config.max_position_embeddings
+        else:
+            max_length = 2048
+        nlls = []
+        prev_end_loc = 0
+        for begin_loc in tqdm(range(0, seq_len, stride)):
+            yield cumulative_log + f"Evaluating... {100*begin_loc/seq_len:.2f}%"
+            end_loc = min(begin_loc + max_length, seq_len)
+            trg_len = end_loc - prev_end_loc  # may be different from stride on last loop
+            input_ids = encodings[:, begin_loc:end_loc]
+            target_ids = input_ids.clone()
+            target_ids[:, :-trg_len] = -100
+            with torch.no_grad():
+                outputs = shared.model(input_ids=input_ids, labels=target_ids)
+                # loss is calculated using CrossEntropyLoss which averages over valid labels
+                # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels
+                # to the left by 1.
+                neg_log_likelihood = outputs.loss
+            nlls.append(neg_log_likelihood)
+            prev_end_loc = end_loc
+            if end_loc == seq_len:
+                break
+        ppl = torch.exp(torch.stack(nlls).mean())
+        add_entry_to_past_evaluations(float(ppl), shared.model_name, input_dataset, stride, _max_length)
+        save_past_evaluations(past_evaluations)
+        cumulative_log += f"The perplexity for {shared.model_name} is: {float(ppl)}\n\n"
+        yield cumulative_log
+def add_entry_to_past_evaluations(perplexity, model, dataset, stride, max_length):
+    global past_evaluations
+    entry = {
+        'Model': model,
+        'LoRAs': ', '.join(shared.lora_names) or '-',
+        'Dataset': dataset,
+        'Perplexity': perplexity,
+        'stride': str(stride),
+        'max_length': str(max_length),
+        'Date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+        'Comment': ''
+    }
+    past_evaluations = pd.concat([past_evaluations, pd.DataFrame([entry])], ignore_index=True)
+def is_in_past_evaluations(model, dataset, stride, max_length):
+    entries = past_evaluations[(past_evaluations['Model'] == model) &
+                               (past_evaluations['Dataset'] == dataset) &
+                               (past_evaluations['max_length'] == str(max_length)) &
+                               (past_evaluations['stride'] == str(stride))]
+    if entries.shape[0] > 0:
+        return True
+    else:
+        return False
+def generate_markdown_table():
+    sorted_df = past_evaluations.sort_values(by=['Dataset', 'stride', 'Perplexity', 'Date'])
+    return sorted_df

modules/html_generator.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import os
+import re
+import time
+from pathlib import Path
+import markdown
+from PIL import Image, ImageOps
+from modules.utils import get_available_chat_styles
+# This is to store the paths to the thumbnails of the profile pictures
+image_cache = {}
+with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r') as f:
+    readable_css = f.read()
+with open(Path(__file__).resolve().parent / '../css/html_4chan_style.css', 'r') as css_f:
+    _4chan_css = css_f.read()
+with open(Path(__file__).resolve().parent / '../css/html_instruct_style.css', 'r') as f:
+    instruct_css = f.read()
+# Custom chat styles
+chat_styles = {}
+for k in get_available_chat_styles():
+    chat_styles[k] = open(Path(f'css/chat_style-{k}.css'), 'r').read()
+def fix_newlines(string):
+    string = string.replace('\n', '\n\n')
+    string = re.sub(r"\n{3,}", "\n\n", string)
+    string = string.strip()
+    return string
+def replace_blockquote(m):
+    return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')
+def convert_to_markdown(string):
+    # Blockquote
+    pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL)
+    string = pattern.sub(replace_blockquote, string)
+    # Code
+    string = string.replace('\\begin{code}', '```')
+    string = string.replace('\\end{code}', '```')
+    string = re.sub(r"(.)```", r"\1\n```", string)
+    result = ''
+    is_code = False
+    for line in string.split('\n'):
+        if line.lstrip(' ').startswith('```'):
+            is_code = not is_code
+        result += line
+        if is_code or line.startswith('|'):  # Don't add an extra \n for tables or code
+            result += '\n'
+        else:
+            result += '\n\n'
+    if is_code:
+        result = result + '```'  # Unfinished code block
+    string = result.strip()
+    return markdown.markdown(string, extensions=['fenced_code', 'tables'])
+def generate_basic_html(string):
+    string = convert_to_markdown(string)
+    string = f'<style>{readable_css}</style><div class="container">{string}</div>'
+    return string
+def process_post(post, c):
+    t = post.split('\n')
+    number = t[0].split(' ')[1]
+    if len(t) > 1:
+        src = '\n'.join(t[1:])
+    else:
+        src = ''
+    src = re.sub('>', '&gt;', src)
+    src = re.sub('(&gt;&gt;[0-9]*)', '<span class="quote">\\1</span>', src)
+    src = re.sub('\n', '<br>\n', src)
+    src = f'<blockquote class="message">{src}\n'
+    src = f'<span class="name">Anonymous </span> <span class="number">No.{number}</span>\n{src}'
+    return src
+def generate_4chan_html(f):
+    posts = []
+    post = ''
+    c = -2
+    for line in f.splitlines():
+        line += "\n"
+        if line == '-----\n':
+            continue
+        elif line.startswith('--- '):
+            c += 1
+            if post != '':
+                src = process_post(post, c)
+                posts.append(src)
+            post = line
+        else:
+            post += line
+    if post != '':
+        src = process_post(post, c)
+        posts.append(src)
+    for i in range(len(posts)):
+        if i == 0:
+            posts[i] = f'<div class="op">{posts[i]}</div>\n'
+        else:
+            posts[i] = f'<div class="reply">{posts[i]}</div>\n'
+    output = ''
+    output += f'<style>{_4chan_css}</style><div id="parent"><div id="container">'
+    for post in posts:
+        output += post
+    output += '</div></div>'
+    output = output.split('\n')
+    for i in range(len(output)):
+        output[i] = re.sub(r'^(&gt;(.*?)(<br>|</div>))', r'<span class="greentext">\1</span>', output[i])
+        output[i] = re.sub(r'^<blockquote class="message">(&gt;(.*?)(<br>|</div>))', r'<blockquote class="message"><span class="greentext">\1</span>', output[i])
+    output = '\n'.join(output)
+    return output
+def make_thumbnail(image):
+    image = image.resize((350, round(image.size[1] / image.size[0] * 350)), Image.Resampling.LANCZOS)
+    if image.size[1] > 470:
+        image = ImageOps.fit(image, (350, 470), Image.LANCZOS)
+    return image
+def get_image_cache(path):
+    cache_folder = Path("cache")
+    if not cache_folder.exists():
+        cache_folder.mkdir()
+    mtime = os.stat(path).st_mtime
+    if (path in image_cache and mtime != image_cache[path][0]) or (path not in image_cache):
+        img = make_thumbnail(Image.open(path))
+        output_file = Path(f'cache/{path.name}_cache.png')
+        img.convert('RGB').save(output_file, format='PNG')
+        image_cache[path] = [mtime, output_file.as_posix()]
+    return image_cache[path][1]
+def generate_instruct_html(history):
+    output = f'<style>{instruct_css}</style><div class="chat" id="chat">'
+    for i, _row in enumerate(history[::-1]):
+        row = [convert_to_markdown(entry) for entry in _row]
+        output += f"""
+              <div class="assistant-message">
+                <div class="text">
+                  <div class="message-body">
+                    {row[1]}
+                  </div>
+                </div>
+              </div>
+            """
+        if len(row[0]) == 0:  # don't display empty user messages
+            continue
+        output += f"""
+              <div class="user-message">
+                <div class="text">
+                  <div class="message-body">
+                    {row[0]}
+                  </div>
+                </div>
+              </div>
+            """
+    output += "</div>"
+    return output
+def generate_cai_chat_html(history, name1, name2, style, reset_cache=False):
+    output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat">'
+    # We use ?name2 and ?time.time() to force the browser to reset caches
+    img_bot = f'<img src="file/cache/pfp_character.png?{name2}">' if Path("cache/pfp_character.png").exists() else ''
+    img_me = f'<img src="file/cache/pfp_me.png?{time.time() if reset_cache else ""}">' if Path("cache/pfp_me.png").exists() else ''
+    for i, _row in enumerate(history[::-1]):
+        row = [convert_to_markdown(entry) for entry in _row]
+        output += f"""
+              <div class="message">
+                <div class="circle-bot">
+                  {img_bot}
+                </div>
+                <div class="text">
+                  <div class="username">
+                    {name2}
+                  </div>
+                  <div class="message-body">
+                    {row[1]}
+                  </div>
+                </div>
+              </div>
+            """
+        if len(row[0]) == 0:  # don't display empty user messages
+            continue
+        output += f"""
+              <div class="message">
+                <div class="circle-you">
+                  {img_me}
+                </div>
+                <div class="text">
+                  <div class="username">
+                    {name1}
+                  </div>
+                  <div class="message-body">
+                    {row[0]}
+                  </div>
+                </div>
+              </div>
+            """
+    output += "</div>"
+    return output
+def generate_chat_html(history, name1, name2, reset_cache=False):
+    output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat">'
+    for i, _row in enumerate(history[::-1]):
+        row = [convert_to_markdown(entry) for entry in _row]
+        output += f"""
+              <div class="message">
+                <div class="text-bot">
+                  <div class="message-body">
+                    {row[1]}
+                  </div>
+                </div>
+              </div>
+            """
+        if len(row[0]) == 0:  # don't display empty user messages
+            continue
+        output += f"""
+              <div class="message">
+                <div class="text-you">
+                  <div class="message-body">
+                    {row[0]}
+                  </div>
+                </div>
+              </div>
+            """
+    output += "</div>"
+    return output
+def chat_html_wrapper(history, name1, name2, mode, style, reset_cache=False):
+    if mode == 'instruct':
+        return generate_instruct_html(history['visible'])
+    elif style == 'wpp':
+        return generate_chat_html(history['visible'], name1, name2)
+    else:
+        return generate_cai_chat_html(history['visible'], name1, name2, style, reset_cache)

modules/loaders.py ADDED Viewed

	@@ -0,0 +1,291 @@

+import functools
+import gradio as gr
+from modules import shared
+loaders_and_params = {
+    'AutoGPTQ': [
+        'triton',
+        'no_inject_fused_attention',
+        'no_inject_fused_mlp',
+        'no_use_cuda_fp16',
+        'wbits',
+        'groupsize',
+        'desc_act',
+        'gpu_memory',
+        'cpu_memory',
+        'cpu',
+        'disk',
+        'auto_devices',
+        'trust_remote_code',
+        'autogptq_info',
+    ],
+    'GPTQ-for-LLaMa': [
+        'wbits',
+        'groupsize',
+        'model_type',
+        'pre_layer',
+        'gptq_for_llama_info',
+    ],
+    'llama.cpp': [
+        'n_ctx',
+        'n_gqa',
+        'rms_norm_eps',
+        'n_gpu_layers',
+        'n_batch',
+        'threads',
+        'no_mmap',
+        'low_vram',
+        'mlock',
+        'llama_cpp_seed',
+        'compress_pos_emb',
+        'alpha_value',
+    ],
+    'llamacpp_HF': [
+        'n_ctx',
+        'n_gqa',
+        'rms_norm_eps',
+        'n_gpu_layers',
+        'n_batch',
+        'threads',
+        'no_mmap',
+        'low_vram',
+        'mlock',
+        'llama_cpp_seed',
+        'compress_pos_emb',
+        'alpha_value',
+        'llamacpp_HF_info',
+    ],
+    'Transformers': [
+        'cpu_memory',
+        'gpu_memory',
+        'trust_remote_code',
+        'load_in_8bit',
+        'bf16',
+        'cpu',
+        'disk',
+        'auto_devices',
+        'load_in_4bit',
+        'use_double_quant',
+        'quant_type',
+        'compute_dtype',
+        'trust_remote_code',
+        'transformers_info'
+    ],
+    'ExLlama': [
+        'gpu_split',
+        'max_seq_len',
+        'compress_pos_emb',
+        'alpha_value',
+        'exllama_info',
+    ],
+    'ExLlama_HF': [
+        'gpu_split',
+        'max_seq_len',
+        'compress_pos_emb',
+        'alpha_value',
+        'exllama_HF_info',
+    ]
+}
+loaders_samplers = {
+    'Transformers': {
+        'temperature',
+        'top_p',
+        'top_k',
+        'typical_p',
+        'epsilon_cutoff',
+        'eta_cutoff',
+        'tfs',
+        'top_a',
+        'repetition_penalty',
+        'repetition_penalty_range',
+        'encoder_repetition_penalty',
+        'no_repeat_ngram_size',
+        'min_length',
+        'seed',
+        'do_sample',
+        'penalty_alpha',
+        'num_beams',
+        'length_penalty',
+        'early_stopping',
+        'mirostat_mode',
+        'mirostat_tau',
+        'mirostat_eta',
+        'ban_eos_token',
+        'add_bos_token',
+        'skip_special_tokens',
+    },
+    'ExLlama_HF': {
+        'temperature',
+        'top_p',
+        'top_k',
+        'typical_p',
+        'epsilon_cutoff',
+        'eta_cutoff',
+        'tfs',
+        'top_a',
+        'repetition_penalty',
+        'repetition_penalty_range',
+        'encoder_repetition_penalty',
+        'no_repeat_ngram_size',
+        'min_length',
+        'seed',
+        'do_sample',
+        'mirostat_mode',
+        'mirostat_tau',
+        'mirostat_eta',
+        'ban_eos_token',
+        'add_bos_token',
+        'skip_special_tokens',
+    },
+    'ExLlama': {
+        'temperature',
+        'top_p',
+        'top_k',
+        'typical_p',
+        'repetition_penalty',
+        'repetition_penalty_range',
+        'seed',
+        'ban_eos_token',
+    },
+    'AutoGPTQ': {
+        'temperature',
+        'top_p',
+        'top_k',
+        'typical_p',
+        'epsilon_cutoff',
+        'eta_cutoff',
+        'tfs',
+        'top_a',
+        'repetition_penalty',
+        'repetition_penalty_range',
+        'encoder_repetition_penalty',
+        'no_repeat_ngram_size',
+        'min_length',
+        'seed',
+        'do_sample',
+        'penalty_alpha',
+        'num_beams',
+        'length_penalty',
+        'early_stopping',
+        'mirostat_mode',
+        'mirostat_tau',
+        'mirostat_eta',
+        'ban_eos_token',
+        'add_bos_token',
+        'skip_special_tokens',
+    },
+    'GPTQ-for-LLaMa': {
+        'temperature',
+        'top_p',
+        'top_k',
+        'typical_p',
+        'epsilon_cutoff',
+        'eta_cutoff',
+        'tfs',
+        'top_a',
+        'repetition_penalty',
+        'repetition_penalty_range',
+        'encoder_repetition_penalty',
+        'no_repeat_ngram_size',
+        'min_length',
+        'seed',
+        'do_sample',
+        'penalty_alpha',
+        'num_beams',
+        'length_penalty',
+        'early_stopping',
+        'mirostat_mode',
+        'mirostat_tau',
+        'mirostat_eta',
+        'ban_eos_token',
+        'add_bos_token',
+        'skip_special_tokens',
+    },
+    'llama.cpp': {
+        'temperature',
+        'top_p',
+        'top_k',
+        'tfs',
+        'repetition_penalty',
+        'mirostat_mode',
+        'mirostat_tau',
+        'mirostat_eta',
+        'ban_eos_token',
+    },
+    'llamacpp_HF': {
+        'temperature',
+        'top_p',
+        'top_k',
+        'typical_p',
+        'epsilon_cutoff',
+        'eta_cutoff',
+        'tfs',
+        'top_a',
+        'repetition_penalty',
+        'repetition_penalty_range',
+        'encoder_repetition_penalty',
+        'no_repeat_ngram_size',
+        'min_length',
+        'seed',
+        'do_sample',
+        'mirostat_mode',
+        'mirostat_tau',
+        'mirostat_eta',
+        'ban_eos_token',
+        'add_bos_token',
+        'skip_special_tokens',
+    },
+}
+@functools.cache
+def list_all_samplers():
+    all_samplers = set()
+    for k in loaders_samplers:
+        for sampler in loaders_samplers[k]:
+            all_samplers.add(sampler)
+    return sorted(all_samplers)
+def blacklist_samplers(loader):
+    all_samplers = list_all_samplers()
+    if loader == 'All':
+        return [gr.update(visible=True) for sampler in all_samplers]
+    else:
+        return [gr.update(visible=True) if sampler in loaders_samplers[loader] else gr.update(visible=False) for sampler in all_samplers]
+def get_gpu_memory_keys():
+    return [k for k in shared.gradio if k.startswith('gpu_memory')]
+@functools.cache
+def get_all_params():
+    all_params = set()
+    for k in loaders_and_params:
+        for el in loaders_and_params[k]:
+            all_params.add(el)
+    if 'gpu_memory' in all_params:
+        all_params.remove('gpu_memory')
+        for k in get_gpu_memory_keys():
+            all_params.add(k)
+    return sorted(all_params)
+def make_loader_params_visible(loader):
+    params = []
+    all_params = get_all_params()
+    if loader in loaders_and_params:
+        params = loaders_and_params[loader]
+        if 'gpu_memory' in params:
+            params.remove('gpu_memory')
+            params += get_gpu_memory_keys()
+    return [gr.update(visible=True) if k in params else gr.update(visible=False) for k in all_params]

modules/models.py ADDED Viewed

	@@ -0,0 +1,343 @@

+import gc
+import os
+import re
+import time
+from pathlib import Path
+import hashlib
+import torch
+import transformers
+from accelerate import infer_auto_device_map, init_empty_weights
+from transformers import (
+    AutoConfig,
+    AutoModel,
+    AutoModelForCausalLM,
+    AutoModelForSeq2SeqLM,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+)
+import modules.shared as shared
+from modules import llama_attn_hijack, sampler_hijack
+from modules.logging_colors import logger
+from modules.models_settings import infer_loader
+transformers.logging.set_verbosity_error()
+local_rank = None
+if shared.args.deepspeed:
+    import deepspeed
+    from transformers.deepspeed import (
+        HfDeepSpeedConfig,
+        is_deepspeed_zero3_enabled
+    )
+    from modules.deepspeed_parameters import generate_ds_config
+    # Distributed setup
+    local_rank = shared.args.local_rank if shared.args.local_rank is not None else int(os.getenv("LOCAL_RANK", "0"))
+    world_size = int(os.getenv("WORLD_SIZE", "1"))
+    torch.cuda.set_device(local_rank)
+    deepspeed.init_distributed()
+    ds_config = generate_ds_config(shared.args.bf16, 1 * world_size, shared.args.nvme_offload_dir)
+    dschf = HfDeepSpeedConfig(ds_config)  # Keep this object alive for the Transformers integration
+sampler_hijack.hijack_samplers()
+def load_model(model_name, loader=None):
+    logger.info(f"Loading {model_name}...")
+    t0 = time.time()
+    shared.is_seq2seq = False
+    load_func_map = {
+        'Transformers': huggingface_loader,
+        'AutoGPTQ': AutoGPTQ_loader,
+        'GPTQ-for-LLaMa': GPTQ_loader,
+        'llama.cpp': llamacpp_loader,
+        'llamacpp_HF': llamacpp_HF_loader,
+        'RWKV': RWKV_loader,
+        'ExLlama': ExLlama_loader,
+        'ExLlama_HF': ExLlama_HF_loader
+    }
+    p = Path(model_name)
+    if p.exists():
+        model_name = p.parts[-1]
+    if loader is None:
+        if shared.args.loader is not None:
+            loader = shared.args.loader
+        else:
+            loader = infer_loader(model_name)
+            if loader is None:
+                logger.error('The path to the model does not exist. Exiting.')
+                return None, None
+    shared.args.loader = loader
+    output = load_func_map[loader](model_name)
+    if type(output) is tuple:
+        model, tokenizer = output
+    else:
+        model = output
+        if model is None:
+            return None, None
+        else:
+            tokenizer = load_tokenizer(model_name, model)
+    # Hijack attention with xformers
+    if any((shared.args.xformers, shared.args.sdp_attention)):
+        llama_attn_hijack.hijack_llama_attention()
+    logger.info(f"Loaded the model in {(time.time()-t0):.2f} seconds.\n")
+    return model, tokenizer
+def load_tokenizer(model_name, model):
+    tokenizer = None
+    path_to_model = Path(f"{shared.args.model_dir}/{model_name}/")
+    if any(s in model_name.lower() for s in ['gpt-4chan', 'gpt4chan']) and Path(f"{shared.args.model_dir}/gpt-j-6B/").exists():
+        tokenizer = AutoTokenizer.from_pretrained(Path(f"{shared.args.model_dir}/gpt-j-6B/"))
+    elif path_to_model.exists():
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(
+                path_to_model,
+                trust_remote_code=shared.args.trust_remote_code,
+                use_fast=False
+            )
+        except ValueError:
+            tokenizer = AutoTokenizer.from_pretrained(
+                path_to_model,
+                trust_remote_code=shared.args.trust_remote_code,
+                use_fast=True
+            )
+    if tokenizer.__class__.__name__ == 'LlamaTokenizer':
+        pairs = [
+            ['tokenizer_config.json', '516c6167c884793a738c440e29ccb80c15e1493ffc965affc69a1a8ddef4572a'],
+            ['special_tokens_map.json', 'ff3b4a612c4e447acb02d40071bddd989fe0da87eb5b7fe0dbadfc4f74de7531']
+        ]
+        for pair in pairs:
+            p = path_to_model / pair[0]
+            if p.exists():
+                with open(p, "rb") as f:
+                    bytes = f.read()
+                file_hash = hashlib.sha256(bytes).hexdigest()
+                if file_hash != pair[1]:
+                    logger.warning(f"{p} is different from the original LlamaTokenizer file. It is either customized or outdated.")
+    return tokenizer
+def huggingface_loader(model_name):
+    path_to_model = Path(f'{shared.args.model_dir}/{model_name}')
+    if 'chatglm' in model_name.lower():
+        LoaderClass = AutoModel
+    else:
+        config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.args.trust_remote_code)
+        if config.to_dict().get("is_encoder_decoder", False):
+            LoaderClass = AutoModelForSeq2SeqLM
+            shared.is_seq2seq = True
+        else:
+            LoaderClass = AutoModelForCausalLM
+    # Load the model in simple 16-bit mode by default
+    if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.load_in_4bit, shared.args.auto_devices, shared.args.disk, shared.args.deepspeed, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None]):
+        model = LoaderClass.from_pretrained(Path(f"{shared.args.model_dir}/{model_name}"), low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16, trust_remote_code=shared.args.trust_remote_code)
+        if torch.backends.mps.is_available():
+            device = torch.device('mps')
+            model = model.to(device)
+        else:
+            model = model.cuda()
+    # DeepSpeed ZeRO-3
+    elif shared.args.deepspeed:
+        model = LoaderClass.from_pretrained(Path(f"{shared.args.model_dir}/{model_name}"), torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16)
+        model = deepspeed.initialize(model=model, config_params=ds_config, model_parameters=None, optimizer=None, lr_scheduler=None)[0]
+        model.module.eval()  # Inference
+        logger.info(f"DeepSpeed ZeRO-3 is enabled: {is_deepspeed_zero3_enabled()}")
+    # Custom
+    else:
+        params = {
+            "low_cpu_mem_usage": True,
+            "trust_remote_code": shared.args.trust_remote_code
+        }
+        if not any((shared.args.cpu, torch.cuda.is_available(), torch.backends.mps.is_available())):
+            logger.warning("torch.cuda.is_available() returned False. This means that no GPU has been detected. Falling back to CPU mode.")
+            shared.args.cpu = True
+        if shared.args.cpu:
+            params["torch_dtype"] = torch.float32
+        else:
+            params["device_map"] = 'auto'
+            if shared.args.load_in_4bit:
+                # See https://github.com/huggingface/transformers/pull/23479/files
+                # and https://huggingface.co/blog/4bit-transformers-bitsandbytes
+                quantization_config_params = {
+                    'load_in_4bit': True,
+                    'bnb_4bit_compute_dtype': eval("torch.{}".format(shared.args.compute_dtype)) if shared.args.compute_dtype in ["bfloat16", "float16", "float32"] else None,
+                    'bnb_4bit_quant_type': shared.args.quant_type,
+                    'bnb_4bit_use_double_quant': shared.args.use_double_quant,
+                }
+                logger.warning("Using the following 4-bit params: " + str(quantization_config_params))
+                params['quantization_config'] = BitsAndBytesConfig(**quantization_config_params)
+            elif shared.args.load_in_8bit and any((shared.args.auto_devices, shared.args.gpu_memory)):
+                params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
+            elif shared.args.load_in_8bit:
+                params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True)
+            elif shared.args.bf16:
+                params["torch_dtype"] = torch.bfloat16
+            else:
+                params["torch_dtype"] = torch.float16
+            params['max_memory'] = get_max_memory_dict()
+            if shared.args.disk:
+                params["offload_folder"] = shared.args.disk_cache_dir
+        checkpoint = Path(f'{shared.args.model_dir}/{model_name}')
+        if shared.args.load_in_8bit and params.get('max_memory', None) is not None and params['device_map'] == 'auto':
+            config = AutoConfig.from_pretrained(checkpoint, trust_remote_code=shared.args.trust_remote_code)
+            with init_empty_weights():
+                model = LoaderClass.from_config(config, trust_remote_code=shared.args.trust_remote_code)
+            model.tie_weights()
+            params['device_map'] = infer_auto_device_map(
+                model,
+                dtype=torch.int8,
+                max_memory=params['max_memory'],
+                no_split_module_classes=model._no_split_modules
+            )
+        model = LoaderClass.from_pretrained(checkpoint, **params)
+    return model
+def RWKV_loader(model_name):
+    from modules.RWKV import RWKVModel, RWKVTokenizer
+    model = RWKVModel.from_pretrained(Path(f'{shared.args.model_dir}/{model_name}'), dtype="fp32" if shared.args.cpu else "bf16" if shared.args.bf16 else "fp16", device="cpu" if shared.args.cpu else "cuda")
+    tokenizer = RWKVTokenizer.from_pretrained(Path(shared.args.model_dir))
+    return model, tokenizer
+def llamacpp_loader(model_name):
+    from modules.llamacpp_model import LlamaCppModel
+    path = Path(f'{shared.args.model_dir}/{model_name}')
+    if path.is_file():
+        model_file = path
+    else:
+        model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('*ggml*.bin'))[0]
+    logger.info(f"llama.cpp weights detected: {model_file}\n")
+    model, tokenizer = LlamaCppModel.from_pretrained(model_file)
+    return model, tokenizer
+def llamacpp_HF_loader(model_name):
+    from modules.llamacpp_hf import LlamacppHF
+    for fname in ["oobabooga_llama-tokenizer", "llama-tokenizer"]:
+        path = Path(f'{shared.args.model_dir}/{fname}')
+        if path.exists():
+            break
+    else:
+        logger.error("Could not load the model because a tokenizer in transformers format was not found. Please download oobabooga/llama-tokenizer.")
+        return None, None
+    tokenizer = AutoTokenizer.from_pretrained(
+        path,
+        trust_remote_code=shared.args.trust_remote_code,
+        use_fast=False
+    )
+    model = LlamacppHF.from_pretrained(model_name)
+    return model, tokenizer
+def GPTQ_loader(model_name):
+    # Monkey patch
+    if shared.args.monkey_patch:
+        logger.warning("Applying the monkey patch for using LoRAs with GPTQ models. It may cause undefined behavior outside its intended scope.")
+        from modules.monkey_patch_gptq_lora import load_model_llama
+        model, _ = load_model_llama(model_name)
+    # No monkey patch
+    else:
+        import modules.GPTQ_loader
+        model = modules.GPTQ_loader.load_quantized(model_name)
+    return model
+def AutoGPTQ_loader(model_name):
+    import modules.AutoGPTQ_loader
+    return modules.AutoGPTQ_loader.load_quantized(model_name)
+def ExLlama_loader(model_name):
+    from modules.exllama import ExllamaModel
+    model, tokenizer = ExllamaModel.from_pretrained(model_name)
+    return model, tokenizer
+def ExLlama_HF_loader(model_name):
+    from modules.exllama_hf import ExllamaHF
+    return ExllamaHF.from_pretrained(model_name)
+def get_max_memory_dict():
+    max_memory = {}
+    if shared.args.gpu_memory:
+        memory_map = list(map(lambda x: x.strip(), shared.args.gpu_memory))
+        for i in range(len(memory_map)):
+            max_memory[i] = f'{memory_map[i]}GiB' if not re.match('.*ib$', memory_map[i].lower()) else memory_map[i]
+        max_cpu_memory = shared.args.cpu_memory.strip() if shared.args.cpu_memory is not None else '99GiB'
+        max_memory['cpu'] = f'{max_cpu_memory}GiB' if not re.match('.*ib$', max_cpu_memory.lower()) else max_cpu_memory
+    # If --auto-devices is provided standalone, try to get a reasonable value
+    # for the maximum memory of device :0
+    elif shared.args.auto_devices:
+        total_mem = (torch.cuda.get_device_properties(0).total_memory / (1024 * 1024))
+        suggestion = round((total_mem - 1000) / 1000) * 1000
+        if total_mem - suggestion < 800:
+            suggestion -= 1000
+        suggestion = int(round(suggestion / 1000))
+        logger.warning(f"Auto-assiging --gpu-memory {suggestion} for your GPU to try to prevent out-of-memory errors. You can manually set other values.")
+        max_memory = {0: f'{suggestion}GiB', 'cpu': f'{shared.args.cpu_memory or 99}GiB'}
+    return max_memory if len(max_memory) > 0 else None
+def clear_torch_cache():
+    gc.collect()
+    if not shared.args.cpu:
+        torch.cuda.empty_cache()
+def unload_model():
+    shared.model = shared.tokenizer = None
+    shared.lora_names = []
+    shared.model_dirty_from_training = False
+    clear_torch_cache()
+def reload_model():
+    unload_model()
+    shared.model, shared.tokenizer = load_model(shared.model_name)

modules/models_settings.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import re
+from pathlib import Path
+import yaml
+from modules import loaders, shared, ui
+def get_model_settings_from_yamls(model):
+    settings = shared.model_config
+    model_settings = {}
+    for pat in settings:
+        if re.match(pat.lower(), model.lower()):
+            for k in settings[pat]:
+                model_settings[k] = settings[pat][k]
+    return model_settings
+def infer_loader(model_name):
+    path_to_model = Path(f'{shared.args.model_dir}/{model_name}')
+    model_settings = get_model_settings_from_yamls(model_name)
+    if not path_to_model.exists():
+        loader = None
+    elif Path(f'{shared.args.model_dir}/{model_name}/quantize_config.json').exists() or ('wbits' in model_settings and type(model_settings['wbits']) is int and model_settings['wbits'] > 0):
+        loader = 'AutoGPTQ'
+    elif len(list(path_to_model.glob('*ggml*.bin'))) > 0:
+        loader = 'llama.cpp'
+    elif re.match('.*ggml.*\.bin', model_name.lower()):
+        loader = 'llama.cpp'
+    elif re.match('.*rwkv.*\.pth', model_name.lower()):
+        loader = 'RWKV'
+    else:
+        loader = 'Transformers'
+    return loader
+# UI: update the command-line arguments based on the interface values
+def update_model_parameters(state, initial=False):
+    elements = ui.list_model_elements()  # the names of the parameters
+    gpu_memories = []
+    for i, element in enumerate(elements):
+        if element not in state:
+            continue
+        value = state[element]
+        if element.startswith('gpu_memory'):
+            gpu_memories.append(value)
+            continue
+        if initial and vars(shared.args)[element] != vars(shared.args_defaults)[element]:
+            continue
+        # Setting null defaults
+        if element in ['wbits', 'groupsize', 'model_type'] and value == 'None':
+            value = vars(shared.args_defaults)[element]
+        elif element in ['cpu_memory'] and value == 0:
+            value = vars(shared.args_defaults)[element]
+        # Making some simple conversions
+        if element in ['wbits', 'groupsize', 'pre_layer']:
+            value = int(value)
+        elif element == 'cpu_memory' and value is not None:
+            value = f"{value}MiB"
+        if element in ['pre_layer']:
+            value = [value] if value > 0 else None
+        setattr(shared.args, element, value)
+    found_positive = False
+    for i in gpu_memories:
+        if i > 0:
+            found_positive = True
+            break
+    if not (initial and vars(shared.args)['gpu_memory'] != vars(shared.args_defaults)['gpu_memory']):
+        if found_positive:
+            shared.args.gpu_memory = [f"{i}MiB" for i in gpu_memories]
+        else:
+            shared.args.gpu_memory = None
+# UI: update the state variable with the model settings
+def apply_model_settings_to_state(model, state):
+    model_settings = get_model_settings_from_yamls(model)
+    if 'loader' not in model_settings:
+        loader = infer_loader(model)
+        if 'wbits' in model_settings and type(model_settings['wbits']) is int and model_settings['wbits'] > 0:
+            loader = 'AutoGPTQ'
+        # If the user is using an alternative GPTQ loader, let them keep using it
+        if not (loader == 'AutoGPTQ' and state['loader'] in ['GPTQ-for-LLaMa', 'ExLlama', 'ExLlama_HF']):
+            state['loader'] = loader
+    for k in model_settings:
+        if k in state:
+            if k in ['wbits', 'groupsize']:
+                state[k] = str(model_settings[k])
+            else:
+                state[k] = model_settings[k]
+    return state
+# Save the settings for this model to models/config-user.yaml
+def save_model_settings(model, state):
+    if model == 'None':
+        yield ("Not saving the settings because no model is loaded.")
+        return
+    with Path(f'{shared.args.model_dir}/config-user.yaml') as p:
+        if p.exists():
+            user_config = yaml.safe_load(open(p, 'r').read())
+        else:
+            user_config = {}
+        model_regex = model + '$'  # For exact matches
+        for _dict in [user_config, shared.model_config]:
+            if model_regex not in _dict:
+                _dict[model_regex] = {}
+        if model_regex not in user_config:
+            user_config[model_regex] = {}
+        for k in ui.list_model_elements():
+            if k == 'loader' or k in loaders.loaders_and_params[state['loader']]:
+                user_config[model_regex][k] = state[k]
+                shared.model_config[model_regex][k] = state[k]
+        output = yaml.dump(user_config, sort_keys=False)
+        with open(p, 'w') as f:
+            f.write(output)
+        yield (f"Settings for {model} saved to {p}")

modules/presets.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import functools
+from pathlib import Path
+import yaml
+def default_preset():
+    return {
+        'do_sample': True,
+        'temperature': 1,
+        'top_p': 1,
+        'typical_p': 1,
+        'epsilon_cutoff': 0,
+        'eta_cutoff': 0,
+        'tfs': 1,
+        'top_a': 0,
+        'repetition_penalty': 1,
+        'repetition_penalty_range': 0,
+        'encoder_repetition_penalty': 1,
+        'top_k': 0,
+        'num_beams': 1,
+        'penalty_alpha': 0,
+        'min_length': 0,
+        'length_penalty': 1,
+        'no_repeat_ngram_size': 0,
+        'early_stopping': False,
+        'mirostat_mode': 0,
+        'mirostat_tau': 5.0,
+        'mirostat_eta': 0.1,
+    }
+def load_preset(name):
+    generate_params = default_preset()
+    if name not in ['None', None, '']:
+        with open(Path(f'presets/{name}.yaml'), 'r') as infile:
+            preset = yaml.safe_load(infile)
+        for k in preset:
+            generate_params[k] = preset[k]
+    generate_params['temperature'] = min(1.99, generate_params['temperature'])
+    return generate_params
+@functools.cache
+def load_preset_memoized(name):
+    return load_preset(name)
+def load_preset_for_ui(name, state):
+    generate_params = load_preset(name)
+    state.update(generate_params)
+    return state, *[generate_params[k] for k in ['do_sample', 'temperature', 'top_p', 'typical_p', 'epsilon_cutoff', 'eta_cutoff', 'repetition_penalty', 'repetition_penalty_range', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'tfs', 'top_a']]
+def generate_preset_yaml(state):
+    defaults = default_preset()
+    data = {k: state[k] for k in ['do_sample', 'temperature', 'top_p', 'typical_p', 'epsilon_cutoff', 'eta_cutoff', 'repetition_penalty', 'repetition_penalty_range', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'tfs', 'top_a']}
+    # Remove entries that are identical to the defaults
+    for k in list(data.keys()):
+        if data[k] == defaults[k]:
+            del data[k]
+    return yaml.dump(data, sort_keys=False)

modules/relative_imports.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import sys
+from pathlib import Path
+class RelativeImport:
+    def __init__(self, path):
+        self.import_path = Path(path)
+    def __enter__(self):
+        sys.path.insert(0, str(self.import_path))
+    def __exit__(self, exc_type, exc_value, traceback):
+        sys.path.remove(str(self.import_path))

modules/text_generation.py ADDED Viewed

	@@ -0,0 +1,337 @@

+import ast
+import copy
+import random
+import re
+import time
+import traceback
+import numpy as np
+import torch
+import transformers
+from transformers import LogitsProcessorList
+import modules.shared as shared
+from modules.callbacks import (
+    Iteratorize,
+    Stream,
+    _StopEverythingStoppingCriteria
+)
+from modules.extensions import apply_extensions
+from modules.html_generator import generate_4chan_html, generate_basic_html
+from modules.logging_colors import logger
+from modules.models import clear_torch_cache, local_rank
+def generate_reply(*args, **kwargs):
+    shared.generation_lock.acquire()
+    try:
+        for result in _generate_reply(*args, **kwargs):
+            yield result
+    finally:
+        shared.generation_lock.release()
+def get_max_prompt_length(state):
+    return state['truncation_length'] - state['max_new_tokens']
+def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_length=None):
+    if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel']:
+        input_ids = shared.tokenizer.encode(str(prompt))
+        input_ids = np.array(input_ids).reshape(1, len(input_ids))
+        return input_ids
+    else:
+        input_ids = shared.tokenizer.encode(str(prompt), return_tensors='pt', add_special_tokens=add_special_tokens)
+        # This is a hack for making replies more creative.
+        if not add_bos_token and input_ids[0][0] == shared.tokenizer.bos_token_id:
+            input_ids = input_ids[:, 1:]
+    # Handling truncation
+    if truncation_length is not None:
+        input_ids = input_ids[:, -truncation_length:]
+    if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'ExllamaModel'] or shared.args.cpu:
+        return input_ids
+    elif shared.args.deepspeed:
+        return input_ids.to(device=local_rank)
+    elif torch.backends.mps.is_available():
+        device = torch.device('mps')
+        return input_ids.to(device)
+    else:
+        return input_ids.cuda()
+def get_encoded_length(prompt):
+    length_after_extensions = apply_extensions('tokenized_length', prompt)
+    if length_after_extensions is not None:
+        return length_after_extensions
+    return len(encode(prompt)[0])
+def decode(output_ids, skip_special_tokens=True):
+    return shared.tokenizer.decode(output_ids, skip_special_tokens)
+# Removes empty replies from gpt4chan outputs
+def fix_gpt4chan(s):
+    for i in range(10):
+        s = re.sub("--- [0-9]*\n>>[0-9]*\n---", "---", s)
+        s = re.sub("--- [0-9]*\n *\n---", "---", s)
+        s = re.sub("--- [0-9]*\n\n\n---", "---", s)
+    return s
+# Fix the LaTeX equations in galactica
+def fix_galactica(s):
+    s = s.replace(r'\[', r'$')
+    s = s.replace(r'\]', r'$')
+    s = s.replace(r'\(', r'$')
+    s = s.replace(r'\)', r'$')
+    s = s.replace(r'$$', r'$')
+    s = re.sub(r'\n', r'\n\n', s)
+    s = re.sub(r"\n{3,}", "\n\n", s)
+    return s
+def get_reply_from_output_ids(output_ids, input_ids, original_question, state, is_chat=False):
+    if shared.is_seq2seq:
+        reply = decode(output_ids, state['skip_special_tokens'])
+    else:
+        new_tokens = len(output_ids) - len(input_ids[0])
+        reply = decode(output_ids[-new_tokens:], state['skip_special_tokens'])
+        # Prevent LlamaTokenizer from skipping a space
+        if type(shared.tokenizer) in [transformers.LlamaTokenizer, transformers.LlamaTokenizerFast] and len(output_ids) > 0:
+            if shared.tokenizer.convert_ids_to_tokens(int(output_ids[-new_tokens])).startswith('▁'):
+                reply = ' ' + reply
+    return reply
+def formatted_outputs(reply, model_name):
+    if any(s in model_name for s in ['gpt-4chan', 'gpt4chan']):
+        reply = fix_gpt4chan(reply)
+        return reply, generate_4chan_html(reply)
+    else:
+        return reply, generate_basic_html(reply)
+def set_manual_seed(seed):
+    seed = int(seed)
+    if seed == -1:
+        seed = random.randint(1, 2**31)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    return seed
+def stop_everything_event():
+    shared.stop_everything = True
+def generate_reply_wrapper(question, state, stopping_strings=None):
+    reply = question if not shared.is_seq2seq else ''
+    yield formatted_outputs(reply, shared.model_name)
+    for reply in generate_reply(question, state, stopping_strings, is_chat=False):
+        if not shared.is_seq2seq:
+            reply = question + reply
+        yield formatted_outputs(reply, shared.model_name)
+def apply_stopping_strings(reply, all_stop_strings):
+    stop_found = False
+    for string in all_stop_strings:
+        idx = reply.find(string)
+        if idx != -1:
+            reply = reply[:idx]
+            stop_found = True
+            break
+    if not stop_found:
+        # If something like "\nYo" is generated just before "\nYou:"
+        # is completed, trim it
+        for string in all_stop_strings:
+            for j in range(len(string) - 1, 0, -1):
+                if reply[-j:] == string[:j]:
+                    reply = reply[:-j]
+                    break
+            else:
+                continue
+            break
+    return reply, stop_found
+def _generate_reply(question, state, stopping_strings=None, is_chat=False):
+    generate_func = apply_extensions('custom_generate_reply')
+    if generate_func is None:
+        if shared.model_name == 'None' or shared.model is None:
+            logger.error("No model is loaded! Select one in the Model tab.")
+            yield ''
+            return
+        if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'ExllamaModel']:
+            generate_func = generate_reply_custom
+        else:
+            generate_func = generate_reply_HF
+    # Preparing the input
+    original_question = question
+    if not is_chat:
+        state = apply_extensions('state', state)
+        question = apply_extensions('input', question, state)
+    # Finding the stopping strings
+    all_stop_strings = []
+    for st in (stopping_strings, ast.literal_eval(f"[{state['custom_stopping_strings']}]")):
+        if type(st) is list and len(st) > 0:
+            all_stop_strings += st
+    if shared.args.verbose:
+        print(f'\n\n{question}\n--------------------\n')
+    shared.stop_everything = False
+    clear_torch_cache()
+    seed = set_manual_seed(state['seed'])
+    last_update = -1
+    reply = ''
+    is_stream = state['stream']
+    if len(all_stop_strings) > 0 and not state['stream']:
+        state = copy.deepcopy(state)
+        state['stream'] = True
+    for reply in generate_func(question, original_question, seed, state, stopping_strings, is_chat=is_chat):
+        reply, stop_found = apply_stopping_strings(reply, all_stop_strings)
+        if is_stream:
+            cur_time = time.time()
+            if cur_time - last_update > 0.041666666666666664:  # Limit streaming to 24 fps
+                last_update = cur_time
+                yield reply
+        if stop_found:
+            break
+    if not is_chat:
+        reply = apply_extensions('output', reply, state)
+    yield reply
+def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False):
+    generate_params = {}
+    for k in ['max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'repetition_penalty_range', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'tfs', 'top_a', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta']:
+        generate_params[k] = state[k]
+    for k in ['epsilon_cutoff', 'eta_cutoff']:
+        if state[k] > 0:
+            generate_params[k] = state[k] * 1e-4
+    if state['ban_eos_token']:
+        generate_params['suppress_tokens'] = [shared.tokenizer.eos_token_id]
+    if shared.args.no_cache:
+        generate_params.update({'use_cache': False})
+    if shared.args.deepspeed:
+        generate_params.update({'synced_gpus': True})
+    # Encode the input
+    input_ids = encode(question, add_bos_token=state['add_bos_token'], truncation_length=get_max_prompt_length(state))
+    output = input_ids[0]
+    cuda = not any((shared.args.cpu, shared.args.deepspeed))
+    # Add the encoded tokens to generate_params
+    question, input_ids, inputs_embeds = apply_extensions('tokenizer', state, question, input_ids, None)
+    original_input_ids = input_ids
+    generate_params.update({'inputs': input_ids})
+    if inputs_embeds is not None:
+        generate_params.update({'inputs_embeds': inputs_embeds})
+    # Stopping criteria / eos token
+    eos_token_ids = [shared.tokenizer.eos_token_id] if shared.tokenizer.eos_token_id is not None else []
+    generate_params['eos_token_id'] = eos_token_ids
+    generate_params['stopping_criteria'] = transformers.StoppingCriteriaList()
+    generate_params['stopping_criteria'].append(_StopEverythingStoppingCriteria())
+    processor = state.get('logits_processor', LogitsProcessorList([]))
+    # In case folks just pass in a processor by itself.
+    if type(processor) != LogitsProcessorList:
+        processor = LogitsProcessorList([processor])
+    apply_extensions('logits_processor', processor, input_ids)
+    generate_params['logits_processor'] = processor
+    t0 = time.time()
+    try:
+        if not is_chat and not shared.is_seq2seq:
+            yield ''
+        # Generate the entire reply at once.
+        if not state['stream']:
+            with torch.no_grad():
+                output = shared.model.generate(**generate_params)[0]
+                if cuda:
+                    output = output.cuda()
+            yield get_reply_from_output_ids(output, input_ids, original_question, state, is_chat=is_chat)
+        # Stream the reply 1 token at a time.
+        # This is based on the trick of using 'stopping_criteria' to create an iterator.
+        else:
+            def generate_with_callback(callback=None, *args, **kwargs):
+                kwargs['stopping_criteria'].append(Stream(callback_func=callback))
+                clear_torch_cache()
+                with torch.no_grad():
+                    shared.model.generate(**kwargs)
+            def generate_with_streaming(**kwargs):
+                return Iteratorize(generate_with_callback, [], kwargs, callback=None)
+            with generate_with_streaming(**generate_params) as generator:
+                for output in generator:
+                    yield get_reply_from_output_ids(output, input_ids, original_question, state, is_chat=is_chat)
+                    if output[-1] in eos_token_ids:
+                        break
+    except Exception:
+        traceback.print_exc()
+    finally:
+        t1 = time.time()
+        original_tokens = len(original_input_ids[0])
+        new_tokens = len(output) - (original_tokens if not shared.is_seq2seq else 0)
+        print(f'Output generated in {(t1-t0):.2f} seconds ({new_tokens/(t1-t0):.2f} tokens/s, {new_tokens} tokens, context {original_tokens}, seed {seed})')
+        return
+def generate_reply_custom(question, original_question, seed, state, stopping_strings=None, is_chat=False):
+    seed = set_manual_seed(state['seed'])
+    t0 = time.time()
+    reply = ''
+    try:
+        if not is_chat:
+            yield ''
+        if not state['stream']:
+            reply = shared.model.generate(question, state)
+            yield reply
+        else:
+            for reply in shared.model.generate_with_streaming(question, state):
+                yield reply
+    except Exception:
+        traceback.print_exc()
+    finally:
+        t1 = time.time()
+        original_tokens = len(encode(original_question)[0])
+        new_tokens = len(encode(original_question + reply)[0]) - original_tokens
+        print(f'Output generated in {(t1-t0):.2f} seconds ({new_tokens/(t1-t0):.2f} tokens/s, {new_tokens} tokens, context {original_tokens}, seed {seed})')
+        return

modules/ui.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import json
+from pathlib import Path
+import gradio as gr
+import torch
+from modules import shared
+with open(Path(__file__).resolve().parent / '../css/main.css', 'r') as f:
+    css = f.read()
+with open(Path(__file__).resolve().parent / '../css/chat.css', 'r') as f:
+    chat_css = f.read()
+with open(Path(__file__).resolve().parent / '../css/main.js', 'r') as f:
+    main_js = f.read()
+with open(Path(__file__).resolve().parent / '../css/chat.js', 'r') as f:
+    chat_js = f.read()
+refresh_symbol = '🔄'
+delete_symbol = '🗑️'
+save_symbol = '💾'
+theme = gr.themes.Default(
+    font=['Helvetica', 'ui-sans-serif', 'system-ui', 'sans-serif'],
+    font_mono=['IBM Plex Mono', 'ui-monospace', 'Consolas', 'monospace'],
+).set(
+    border_color_primary='#c5c5d2',
+    button_large_padding='6px 12px',
+    body_text_color_subdued='#484848',
+    background_fill_secondary='#eaeaea'
+)
+def list_model_elements():
+    elements = [
+        'loader',
+        'cpu_memory',
+        'auto_devices',
+        'disk',
+        'cpu',
+        'bf16',
+        'load_in_8bit',
+        'trust_remote_code',
+        'load_in_4bit',
+        'compute_dtype',
+        'quant_type',
+        'use_double_quant',
+        'wbits',
+        'groupsize',
+        'model_type',
+        'pre_layer',
+        'triton',
+        'desc_act',
+        'no_inject_fused_attention',
+        'no_inject_fused_mlp',
+        'no_use_cuda_fp16',
+        'threads',
+        'n_batch',
+        'no_mmap',
+        'low_vram',
+        'mlock',
+        'n_gpu_layers',
+        'n_ctx',
+        'n_gqa',
+        'rms_norm_eps',
+        'llama_cpp_seed',
+        'gpu_split',
+        'max_seq_len',
+        'compress_pos_emb',
+        'alpha_value'
+    ]
+    for i in range(torch.cuda.device_count()):
+        elements.append(f'gpu_memory_{i}')
+    return elements
+def list_interface_input_elements():
+    elements = [
+        'max_new_tokens',
+        'seed',
+        'temperature',
+        'top_p',
+        'top_k',
+        'typical_p',
+        'epsilon_cutoff',
+        'eta_cutoff',
+        'repetition_penalty',
+        'repetition_penalty_range',
+        'encoder_repetition_penalty',
+        'no_repeat_ngram_size',
+        'min_length',
+        'do_sample',
+        'penalty_alpha',
+        'num_beams',
+        'length_penalty',
+        'early_stopping',
+        'mirostat_mode',
+        'mirostat_tau',
+        'mirostat_eta',
+        'add_bos_token',
+        'ban_eos_token',
+        'truncation_length',
+        'custom_stopping_strings',
+        'skip_special_tokens',
+        'stream',
+        'tfs',
+        'top_a',
+    ]
+    if shared.args.chat:
+        elements += [
+            'character_menu',
+            'history',
+            'name1',
+            'name2',
+            'greeting',
+            'context',
+            'chat_generation_attempts',
+            'stop_at_newline',
+            'mode',
+            'instruction_template',
+            'name1_instruct',
+            'name2_instruct',
+            'context_instruct',
+            'turn_template',
+            'chat_style',
+            'chat-instruct_command',
+        ]
+    else:
+        elements.append('textbox')
+        if not shared.args.notebook:
+            elements.append('output_textbox')
+    elements += list_model_elements()
+    return elements
+def gather_interface_values(*args):
+    output = {}
+    for i, element in enumerate(list_interface_input_elements()):
+        output[element] = args[i]
+    if not shared.args.multi_user:
+        shared.persistent_interface_state = output
+        Path('logs').mkdir(exist_ok=True)
+        with open(Path(f'logs/session_{shared.get_mode()}_autosave.json'), 'w') as f:
+            f.write(json.dumps(output, indent=4))
+    return output
+def apply_interface_values(state, use_persistent=False):
+    if use_persistent:
+        state = shared.persistent_interface_state
+    elements = list_interface_input_elements()
+    if len(state) == 0:
+        return [gr.update() for k in elements]  # Dummy, do nothing
+    else:
+        return [state[k] if k in state else gr.update() for k in elements]
+class ToolButton(gr.Button, gr.components.IOComponent):
+    """
+    Small button with single emoji as text, fits inside gradio forms
+    Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui
+    """
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def get_block_name(self):
+        return "button"
+def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_class):
+    """
+    Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui
+    """
+    def refresh():
+        refresh_method()
+        args = refreshed_args() if callable(refreshed_args) else refreshed_args
+        for k, v in args.items():
+            setattr(refresh_component, k, v)
+        return gr.update(**(args or {}))
+    refresh_button = ToolButton(value=refresh_symbol, elem_classes=elem_class)
+    refresh_button.click(
+        fn=refresh,
+        inputs=[],
+        outputs=[refresh_component]
+    )
+    return refresh_button
+def create_delete_button(**kwargs):
+    return ToolButton(value=delete_symbol, **kwargs)
+def create_save_button(**kwargs):
+    return ToolButton(value=save_symbol, **kwargs)

modules/utils.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import os
+import re
+from datetime import datetime
+from pathlib import Path
+from modules import shared
+from modules.logging_colors import logger
+# Helper function to get multiple values from shared.gradio
+def gradio(*keys):
+    if len(keys) == 1 and type(keys[0]) is list:
+        keys = keys[0]
+    return [shared.gradio[k] for k in keys]
+def save_file(fname, contents):
+    if fname == '':
+        logger.error('File name is empty!')
+        return
+    root_folder = Path(__file__).resolve().parent.parent
+    abs_path = Path(fname).resolve()
+    rel_path = abs_path.relative_to(root_folder)
+    if rel_path.parts[0] == '..':
+        logger.error(f'Invalid file path: {fname}')
+        return
+    with open(abs_path, 'w', encoding='utf-8') as f:
+        f.write(contents)
+    logger.info(f'Saved {abs_path}.')
+def delete_file(fname):
+    if fname == '':
+        logger.error('File name is empty!')
+        return
+    root_folder = Path(__file__).resolve().parent.parent
+    abs_path = Path(fname).resolve()
+    rel_path = abs_path.relative_to(root_folder)
+    if rel_path.parts[0] == '..':
+        logger.error(f'Invalid file path: {fname}')
+        return
+    if abs_path.exists():
+        abs_path.unlink()
+        logger.info(f'Deleted {fname}.')
+def current_time():
+    return f"{datetime.now().strftime('%Y-%m-%d-%H%M%S')}"
+def atoi(text):
+    return int(text) if text.isdigit() else text.lower()
+# Replace multiple string pairs in a string
+def replace_all(text, dic):
+    for i, j in dic.items():
+        text = text.replace(i, j)
+    return text
+def natural_keys(text):
+    return [atoi(c) for c in re.split(r'(\d+)', text)]
+def get_available_models():
+    return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json', '.yaml'))], key=natural_keys)
+def get_available_presets():
+    return sorted(set((k.stem for k in Path('presets').glob('*.yaml'))), key=natural_keys)
+def get_available_prompts():
+    prompts = []
+    files = set((k.stem for k in Path('prompts').glob('*.txt')))
+    prompts += sorted([k for k in files if re.match('^[0-9]', k)], key=natural_keys, reverse=True)
+    prompts += sorted([k for k in files if re.match('^[^0-9]', k)], key=natural_keys)
+    prompts += ['Instruct-' + k for k in get_available_instruction_templates() if k != 'None']
+    prompts += ['None']
+    return prompts
+def get_available_characters():
+    paths = (x for x in Path('characters').iterdir() if x.suffix in ('.json', '.yaml', '.yml'))
+    return ['None'] + sorted(set((k.stem for k in paths if k.stem != "instruction-following")), key=natural_keys)
+def get_available_instruction_templates():
+    path = "characters/instruction-following"
+    paths = []
+    if os.path.exists(path):
+        paths = (x for x in Path(path).iterdir() if x.suffix in ('.json', '.yaml', '.yml'))
+    return ['None'] + sorted(set((k.stem for k in paths)), key=natural_keys)
+def get_available_extensions():
+    return sorted(set(map(lambda x: x.parts[1], Path('extensions').glob('*/script.py'))), key=natural_keys)
+def get_available_loras():
+    return sorted([item.name for item in list(Path(shared.args.lora_dir).glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=natural_keys)
+def get_datasets(path: str, ext: str):
+    # include subdirectories for raw txt files to allow training from a subdirectory of txt files
+    if ext == "txt":
+        return ['None'] + sorted(set([k.stem for k in list(Path(path).glob('txt')) + list(Path(path).glob('*/')) if k.stem != 'put-trainer-datasets-here']), key=natural_keys)
+    return ['None'] + sorted(set([k.stem for k in Path(path).glob(f'*.{ext}') if k.stem != 'put-trainer-datasets-here']), key=natural_keys)
+def get_available_chat_styles():
+    return sorted(set(('-'.join(k.stem.split('-')[1:]) for k in Path('css').glob('chat_style*.css'))), key=natural_keys)
+def get_available_sessions():
+    items = sorted(set(k.stem for k in Path('logs').glob(f'session_{shared.get_mode()}*')), key=natural_keys, reverse=True)
+    return [item for item in items if 'autosave' in item] + [item for item in items if 'autosave' not in item]