Spaces:

featherless-ai
/

klimbr-demo

Runtime error

File size: 9,445 Bytes

from openai import OpenAI
import gradio as gr
import os
import json
import html
import random
import datetime

api_key = os.environ.get('FEATHERLESS_API_KEY')
client = OpenAI(
    base_url="https://api.featherless.ai/v1",
    api_key=api_key
)

# from https://github.com/av/klmbr/blob/ca2967123d171fc6d91c329c40e5050a86088446/klmbr/main.py
# I sure which I could import this, but can't figure out how to make HF spaces run this as a module
# and not a file.
import random

mods = [
    "capitalize",
    "diacritic",
    'leetspeak',
    "remove_vowel",
]

def klimbr_randomize(text, percentage):
    if not text:
        return "", {} # Return empty string and empty mapping if input is empty

    if not 0 <= percentage <= 100:
        raise ValueError("Percentage must be between 0 and 100")

    words = text.split()
    chars = list(text)
    num_chars_to_modify = max(1, int(len(chars) * (percentage / 100)))
    indices_to_modify = random.sample(range(len(chars)), num_chars_to_modify)
    word_mapping = {}

    for idx in indices_to_modify:
        modification = random.choice(mods)

        # Find the word that contains the current character
        current_length = 0
        for word_idx, word in enumerate(words):
            if current_length <= idx < current_length + len(word):
                original_word = word
                word_start_idx = current_length
                break
            current_length += len(word) + 1 # +1 for the space
        else:
            # If we're here, we're likely dealing with a space or the last character
            continue

        if modification == "capitalize":
            chars[idx] = chars[idx].swapcase()
        elif modification == "diacritic":
            if chars[idx].isalpha():
                diacritics = ["̀", "́", "̂", "̃", "̈", "̄", "̆", "̇", "̊", "̋"]
                chars[idx] = chars[idx] + random.choice(diacritics)
        elif modification == "leetspeak":
            leetspeak_map = {
                "a": "4", "e": "3", "i": "1", "o": "0", "s": "5",
                "t": "7", "b": "8", "g": "9", "l": "1",
            }
            chars[idx] = leetspeak_map.get(chars[idx].lower(), chars[idx])
        elif modification == "remove_vowel":
            if chars[idx].lower() in "aeiou":
                chars[idx] = ""

        modified_word = "".join(
            chars[word_start_idx : word_start_idx + len(original_word)]
        )

        if modified_word != original_word:
            # Clean up both the modified word and the original word
            cleaned_modified_word = modified_word.rstrip('.,')
            cleaned_original_word = original_word.rstrip('.,')
            word_mapping[cleaned_modified_word] = cleaned_original_word

    modified_text = "".join(chars)
    return modified_text, word_mapping
## end of klimbr inclusion

klimbr_cache = {}
def memoized_klimbr(message, percentage, extra, last=False):
    key = (message, percentage, extra)

    # _always_ re-randomize the last message
    if last and key in klimbr_cache:
        klimbr_cache.pop(key)

    if key not in klimbr_cache:
        klimbr_cache[key] = klimbr_randomize(message, percentage)[0]

    return klimbr_cache[key]

LOG_TRANSLATIONS=os.environ.get('LOG_TRANSLATIONS', True)

def klimberize_conversation(message, history, percentage, log=LOG_TRANSLATIONS):
    # we memoize the klimbrization of strings.
    # this is to work with the gradio chat interface model
    # so that messages are not _re_-randomized at each conversation turn

    klimbred_history = [
        (memoized_klimbr(human, percentage, index), assistant)
        for index, (human, assistant) in enumerate(history)
    ]

    klimbred_message = memoized_klimbr(message, percentage, len(history), last=True)

    if log:
        for original, kbed in zip([*[u for u,a in history], message], [*[u for u,a in klimbred_history], klimbred_message]):
            print(f"Translated '{original}' as '{kbed}'")

    return (klimbred_message, klimbred_history)

def respond(message, history, model, klimbr_percentage):
    history_openai_format = []

    message, history = klimberize_conversation(message, history, klimbr_percentage)

    for human, assistant in history:
        history_openai_format.append({"role": "user", "content": human })
        history_openai_format.append({"role": "assistant", "content":assistant})
    history_openai_format.append({"role": "user", "content": message})

    response = client.chat.completions.create(
        model=model,
        messages= history_openai_format,
        temperature=1.0,
        stream=True,
        max_tokens=2000,
        extra_headers={
            'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/klimbr-demo',
            'X-Title': "Klimbr demo space"
        }
    )

    partial_message = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
              content = chunk.choices[0].delta.content
              escaped_content = html.escape(content)
              partial_message += escaped_content
              yield partial_message

logo = open('./logo.svg').read()

# we chose a few models across the smaller model classes to give a sense of the technique
MODEL_CHOICES = {
    "llama2-13b-4k": [
        "NousResearch/Nous-Hermes-Llama2-13b",
    ],
    "llama3-8b-8k": [
        "meta-llama/Meta-Llama-3-8B-Instruct",
        "NousResearch/Hermes-2-Theta-Llama-3-8B",
        "aaditya/Llama3-OpenBioLLM-8B",
        "elyza/Llama-3-ELYZA-JP-8B",
        "mlabonne/NeuralDaredevil-8B-abliterated",
    ],
    "llama31-8b-16k": [
        "meta-llama/Meta-Llama-3.1-8B-Instruct",
        "NousResearch/Hermes-3-Llama-3.1-8B",
        "shenzhi-wang/Llama3.1-8B-Chinese-Chat",
        "AXCXEPT/Llama-3.1-8B-EZO-1.1-it",
        "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated",
        "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct",
    ],
    "mistral-v02-7b-lc": [
        "HuggingFaceH4/zephyr-7b-beta",
        "mlabonne/NeuralDaredevil-7B",
        "HuggingFaceH4/zephyr-7b-alpha",
    ],
    "mistral-nemo-12b-lc": [
        "mistralai/Mistral-Nemo-Instruct-2407",
    ],
    "rwvk-14b-lc": [
        "m8than/apple-rwkv-1-c-14b",
    ],
}

def build_model_choices():
    all_choices = []
    for model_class_name in MODEL_CHOICES:
        model_class = MODEL_CHOICES[model_class_name]
        all_choices += [ (f"{model_id} ({model_class_name})", model_id) for model_id in model_class ]

    return all_choices

model_choices = build_model_choices()

def initial_model(referer=None):
    return "mistralai/Mistral-Nemo-Instruct-2407"
    # let's use a random but different model each day.
    # key=os.environ.get('RANDOM_SEED', 'kcOtfNHA+e')
    # o = random.Random(f"{key}-{datetime.date.today().strftime('%Y-%m-%d')}")
    # return o.choice(model_choices)[1]

title_text="Klimbr token input pre-processor demo space"
klimbr_url="https://github.com/av/klmbr"
css = """
.logo-mark { fill: #ffe184; }

/* from https://github.com/gradio-app/gradio/issues/4001
 * necessary as putting ChatInterface in gr.Blocks changes behaviour
 */

.contain { display: flex; flex-direction: column; }
.gradio-container { height: 100vh !important; }
#component-0 { height: 100%; }
#chatbot { flex-grow: 1; overflow: auto;}
.lead-text {
  display: flex;
  flex-direction: column;
  align-items: center;
  justify-content: center;
  padding: 20px;
  box-sizing: border-box;
}

.content { 
    max-width: 60vh;
    text-align: center;
    font-size: 15pt;
}

.h1 {
    margin-bottom: 20px;
}
"""
with gr.Blocks(title_text, css=css) as demo:
    gr.HTML(f"""
        <div class="lead-text">
            <h1 align="center"><a href="{klimbr_url}">Klimbr</a> demo space</h1>
            <div class="content">
                <p>
                    Klimbr is a technique to increase entropy in LLM outputs
                    by adding entropy to the input prompt prior to inference.
                </p>
                <p>
                    For details on the technique see <a href="{klimbr_url}">the klimbr github</a>
                    or the source code of this space.
                </p>
        </div>
    """)

    # hidden_state = gr.State(value=initial_model)
    percentage = gr.Slider(
        minimum=0,
        maximum=1,
        value=0.65,
        label="Percentage of input text to randomize"
    )

    with gr.Row():
        model_selector = gr.Dropdown(
            label="Select your Model",
            choices=model_choices,
            value=initial_model,
            # value=hidden_state,
            scale=4
        )
        gr.Button(
            value="Visit Model Card ↗️",
            scale=1
        ).click(
            inputs=[model_selector],
            js="(model_selection) => { window.open(`https://huggingface.co/${model_selection}`, '_blank') }",
            fn=None,
        )

    gr.ChatInterface(
        respond,
        additional_inputs=[model_selector, percentage],
        head=""",
        <script>console.log("Hello from gradio!")</script>
        """,
        concurrency_limit=5
    )
    gr.HTML(f"""
        <p align="center">
            Inference by <a href="https://featherless.ai">{logo}</a>
        </p>
    """)
    def update_initial_model_choice(request: gr.Request):
        return initial_model(request.headers.get('referer'))

    demo.load(update_initial_model_choice, outputs=model_selector)

demo.launch()