Spaces:
Runtime error
Runtime error
from openai import OpenAI | |
import gradio as gr | |
import os | |
import json | |
import html | |
import random | |
import datetime | |
api_key = os.environ.get('FEATHERLESS_API_KEY') | |
client = OpenAI( | |
base_url="https://api.featherless.ai/v1", | |
api_key=api_key | |
) | |
# from https://github.com/av/klmbr/blob/ca2967123d171fc6d91c329c40e5050a86088446/klmbr/main.py | |
# I sure which I could import this, but can't figure out how to make HF spaces run this as a module | |
# and not a file. | |
import random | |
mods = [ | |
"capitalize", | |
"diacritic", | |
'leetspeak', | |
"remove_vowel", | |
] | |
def klimbr_randomize(text, percentage): | |
if not text: | |
return "", {} # Return empty string and empty mapping if input is empty | |
if not 0 <= percentage <= 100: | |
raise ValueError("Percentage must be between 0 and 100") | |
words = text.split() | |
chars = list(text) | |
num_chars_to_modify = max(1, int(len(chars) * (percentage / 100))) | |
indices_to_modify = random.sample(range(len(chars)), num_chars_to_modify) | |
word_mapping = {} | |
for idx in indices_to_modify: | |
modification = random.choice(mods) | |
# Find the word that contains the current character | |
current_length = 0 | |
for word_idx, word in enumerate(words): | |
if current_length <= idx < current_length + len(word): | |
original_word = word | |
word_start_idx = current_length | |
break | |
current_length += len(word) + 1 # +1 for the space | |
else: | |
# If we're here, we're likely dealing with a space or the last character | |
continue | |
if modification == "capitalize": | |
chars[idx] = chars[idx].swapcase() | |
elif modification == "diacritic": | |
if chars[idx].isalpha(): | |
diacritics = ["̀", "́", "̂", "̃", "̈", "̄", "̆", "̇", "̊", "̋"] | |
chars[idx] = chars[idx] + random.choice(diacritics) | |
elif modification == "leetspeak": | |
leetspeak_map = { | |
"a": "4", "e": "3", "i": "1", "o": "0", "s": "5", | |
"t": "7", "b": "8", "g": "9", "l": "1", | |
} | |
chars[idx] = leetspeak_map.get(chars[idx].lower(), chars[idx]) | |
elif modification == "remove_vowel": | |
if chars[idx].lower() in "aeiou": | |
chars[idx] = "" | |
modified_word = "".join( | |
chars[word_start_idx : word_start_idx + len(original_word)] | |
) | |
if modified_word != original_word: | |
# Clean up both the modified word and the original word | |
cleaned_modified_word = modified_word.rstrip('.,') | |
cleaned_original_word = original_word.rstrip('.,') | |
word_mapping[cleaned_modified_word] = cleaned_original_word | |
modified_text = "".join(chars) | |
return modified_text, word_mapping | |
## end of klimbr inclusion | |
klimbr_cache = {} | |
def memoized_klimbr(message, percentage, extra, last=False): | |
key = (message, percentage, extra) | |
# _always_ re-randomize the last message | |
if last and key in klimbr_cache: | |
klimbr_cache.pop(key) | |
if key not in klimbr_cache: | |
klimbr_cache[key] = klimbr_randomize(message, percentage)[0] | |
return klimbr_cache[key] | |
LOG_TRANSLATIONS=os.environ.get('LOG_TRANSLATIONS', True) | |
def klimberize_conversation(message, history, percentage, log=LOG_TRANSLATIONS): | |
# we memoize the klimbrization of strings. | |
# this is to work with the gradio chat interface model | |
# so that messages are not _re_-randomized at each conversation turn | |
klimbred_history = [ | |
(memoized_klimbr(human, percentage, index), assistant) | |
for index, (human, assistant) in enumerate(history) | |
] | |
klimbred_message = memoized_klimbr(message, percentage, len(history), last=True) | |
if log: | |
for original, kbed in zip([*[u for u,a in history], message], [*[u for u,a in klimbred_history], klimbred_message]): | |
print(f"Translated '{original}' as '{kbed}'") | |
return (klimbred_message, klimbred_history) | |
def respond(message, history, model, klimbr_percentage): | |
history_openai_format = [] | |
message, history = klimberize_conversation(message, history, klimbr_percentage) | |
for human, assistant in history: | |
history_openai_format.append({"role": "user", "content": human }) | |
history_openai_format.append({"role": "assistant", "content":assistant}) | |
history_openai_format.append({"role": "user", "content": message}) | |
response = client.chat.completions.create( | |
model=model, | |
messages= history_openai_format, | |
temperature=1.0, | |
stream=True, | |
max_tokens=2000, | |
extra_headers={ | |
'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/klimbr-demo', | |
'X-Title': "Klimbr demo space" | |
} | |
) | |
partial_message = "" | |
for chunk in response: | |
if chunk.choices[0].delta.content is not None: | |
content = chunk.choices[0].delta.content | |
escaped_content = html.escape(content) | |
partial_message += escaped_content | |
yield partial_message | |
logo = open('./logo.svg').read() | |
# we chose a few models across the smaller model classes to give a sense of the technique | |
MODEL_CHOICES = { | |
"llama2-13b-4k": [ | |
"NousResearch/Nous-Hermes-Llama2-13b", | |
], | |
"llama3-8b-8k": [ | |
"meta-llama/Meta-Llama-3-8B-Instruct", | |
"NousResearch/Hermes-2-Theta-Llama-3-8B", | |
"aaditya/Llama3-OpenBioLLM-8B", | |
"elyza/Llama-3-ELYZA-JP-8B", | |
"mlabonne/NeuralDaredevil-8B-abliterated", | |
], | |
"llama31-8b-16k": [ | |
"meta-llama/Meta-Llama-3.1-8B-Instruct", | |
"NousResearch/Hermes-3-Llama-3.1-8B", | |
"shenzhi-wang/Llama3.1-8B-Chinese-Chat", | |
"AXCXEPT/Llama-3.1-8B-EZO-1.1-it", | |
"mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", | |
"VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct", | |
], | |
"mistral-v02-7b-lc": [ | |
"HuggingFaceH4/zephyr-7b-beta", | |
"mlabonne/NeuralDaredevil-7B", | |
"HuggingFaceH4/zephyr-7b-alpha", | |
], | |
"mistral-nemo-12b-lc": [ | |
"mistralai/Mistral-Nemo-Instruct-2407", | |
], | |
"rwvk-14b-lc": [ | |
"m8than/apple-rwkv-1-c-14b", | |
], | |
} | |
def build_model_choices(): | |
all_choices = [] | |
for model_class_name in MODEL_CHOICES: | |
model_class = MODEL_CHOICES[model_class_name] | |
all_choices += [ (f"{model_id} ({model_class_name})", model_id) for model_id in model_class ] | |
return all_choices | |
model_choices = build_model_choices() | |
def initial_model(referer=None): | |
return "mistralai/Mistral-Nemo-Instruct-2407" | |
# let's use a random but different model each day. | |
# key=os.environ.get('RANDOM_SEED', 'kcOtfNHA+e') | |
# o = random.Random(f"{key}-{datetime.date.today().strftime('%Y-%m-%d')}") | |
# return o.choice(model_choices)[1] | |
title_text="Klimbr token input pre-processor demo space" | |
klimbr_url="https://github.com/av/klmbr" | |
css = """ | |
.logo-mark { fill: #ffe184; } | |
/* from https://github.com/gradio-app/gradio/issues/4001 | |
* necessary as putting ChatInterface in gr.Blocks changes behaviour | |
*/ | |
.contain { display: flex; flex-direction: column; } | |
.gradio-container { height: 100vh !important; } | |
#component-0 { height: 100%; } | |
#chatbot { flex-grow: 1; overflow: auto;} | |
.lead-text { | |
display: flex; | |
flex-direction: column; | |
align-items: center; | |
justify-content: center; | |
padding: 20px; | |
box-sizing: border-box; | |
} | |
.content { | |
max-width: 60vh; | |
text-align: center; | |
font-size: 15pt; | |
} | |
.h1 { | |
margin-bottom: 20px; | |
} | |
""" | |
with gr.Blocks(title_text, css=css) as demo: | |
gr.HTML(f""" | |
<div class="lead-text"> | |
<h1 align="center"><a href="{klimbr_url}">Klimbr</a> demo space</h1> | |
<div class="content"> | |
<p> | |
Klimbr is a technique to increase entropy in LLM outputs | |
by adding entropy to the input prompt prior to inference. | |
</p> | |
<p> | |
For details on the technique see <a href="{klimbr_url}">the klimbr github</a> | |
or the source code of this space. | |
</p> | |
</div> | |
""") | |
# hidden_state = gr.State(value=initial_model) | |
percentage = gr.Slider( | |
minimum=0, | |
maximum=1, | |
value=0.65, | |
label="Percentage of input text to randomize" | |
) | |
with gr.Row(): | |
model_selector = gr.Dropdown( | |
label="Select your Model", | |
choices=model_choices, | |
value=initial_model, | |
# value=hidden_state, | |
scale=4 | |
) | |
gr.Button( | |
value="Visit Model Card ↗️", | |
scale=1 | |
).click( | |
inputs=[model_selector], | |
js="(model_selection) => { window.open(`https://huggingface.co/${model_selection}`, '_blank') }", | |
fn=None, | |
) | |
gr.ChatInterface( | |
respond, | |
additional_inputs=[model_selector, percentage], | |
head=""", | |
<script>console.log("Hello from gradio!")</script> | |
""", | |
concurrency_limit=5 | |
) | |
gr.HTML(f""" | |
<p align="center"> | |
Inference by <a href="https://featherless.ai">{logo}</a> | |
</p> | |
""") | |
def update_initial_model_choice(request: gr.Request): | |
return initial_model(request.headers.get('referer')) | |
demo.load(update_initial_model_choice, outputs=model_selector) | |
demo.launch() | |