Spaces:

featherless-ai
/

klimbr-demo

Runtime error

App Files Files Community

klimbr-demo / app.py

wxgeorge

:wrench: jack up default klimbr rate.

a920174 about 2 months ago

raw

history blame contribute delete

9.45 kB

	from openai import OpenAI
	import gradio as gr
	import os
	import json
	import html
	import random
	import datetime

	api_key = os.environ.get('FEATHERLESS_API_KEY')
	client = OpenAI(
	base_url="https://api.featherless.ai/v1",
	api_key=api_key
	)

	# from https://github.com/av/klmbr/blob/ca2967123d171fc6d91c329c40e5050a86088446/klmbr/main.py
	# I sure which I could import this, but can't figure out how to make HF spaces run this as a module
	# and not a file.
	import random

	mods = [
	"capitalize",
	"diacritic",
	'leetspeak',
	"remove_vowel",
	]

	def klimbr_randomize(text, percentage):
	if not text:
	return "", {} # Return empty string and empty mapping if input is empty

	if not 0 <= percentage <= 100:
	raise ValueError("Percentage must be between 0 and 100")

	words = text.split()
	chars = list(text)
	num_chars_to_modify = max(1, int(len(chars) * (percentage / 100)))
	indices_to_modify = random.sample(range(len(chars)), num_chars_to_modify)
	word_mapping = {}

	for idx in indices_to_modify:
	modification = random.choice(mods)

	# Find the word that contains the current character
	current_length = 0
	for word_idx, word in enumerate(words):
	if current_length <= idx < current_length + len(word):
	original_word = word
	word_start_idx = current_length
	break
	current_length += len(word) + 1 # +1 for the space
	else:
	# If we're here, we're likely dealing with a space or the last character
	continue

	if modification == "capitalize":
	chars[idx] = chars[idx].swapcase()
	elif modification == "diacritic":
	if chars[idx].isalpha():
	diacritics = ["̀", "́", "̂", "̃", "̈", "̄", "̆", "̇", "̊", "̋"]
	chars[idx] = chars[idx] + random.choice(diacritics)
	elif modification == "leetspeak":
	leetspeak_map = {
	"a": "4", "e": "3", "i": "1", "o": "0", "s": "5",
	"t": "7", "b": "8", "g": "9", "l": "1",
	}
	chars[idx] = leetspeak_map.get(chars[idx].lower(), chars[idx])
	elif modification == "remove_vowel":
	if chars[idx].lower() in "aeiou":
	chars[idx] = ""

	modified_word = "".join(
	chars[word_start_idx : word_start_idx + len(original_word)]
	)

	if modified_word != original_word:
	# Clean up both the modified word and the original word
	cleaned_modified_word = modified_word.rstrip('.,')
	cleaned_original_word = original_word.rstrip('.,')
	word_mapping[cleaned_modified_word] = cleaned_original_word

	modified_text = "".join(chars)
	return modified_text, word_mapping
	## end of klimbr inclusion

	klimbr_cache = {}
	def memoized_klimbr(message, percentage, extra, last=False):
	key = (message, percentage, extra)

	# _always_ re-randomize the last message
	if last and key in klimbr_cache:
	klimbr_cache.pop(key)

	if key not in klimbr_cache:
	klimbr_cache[key] = klimbr_randomize(message, percentage)[0]

	return klimbr_cache[key]

	LOG_TRANSLATIONS=os.environ.get('LOG_TRANSLATIONS', True)

	def klimberize_conversation(message, history, percentage, log=LOG_TRANSLATIONS):
	# we memoize the klimbrization of strings.
	# this is to work with the gradio chat interface model
	# so that messages are not _re_-randomized at each conversation turn

	klimbred_history = [
	(memoized_klimbr(human, percentage, index), assistant)
	for index, (human, assistant) in enumerate(history)
	]

	klimbred_message = memoized_klimbr(message, percentage, len(history), last=True)

	if log:
	for original, kbed in zip([[u for u,a in history], message], [[u for u,a in klimbred_history], klimbred_message]):
	print(f"Translated '{original}' as '{kbed}'")

	return (klimbred_message, klimbred_history)

	def respond(message, history, model, klimbr_percentage):
	history_openai_format = []

	message, history = klimberize_conversation(message, history, klimbr_percentage)

	for human, assistant in history:
	history_openai_format.append({"role": "user", "content": human })
	history_openai_format.append({"role": "assistant", "content":assistant})
	history_openai_format.append({"role": "user", "content": message})

	response = client.chat.completions.create(
	model=model,
	messages= history_openai_format,
	temperature=1.0,
	stream=True,
	max_tokens=2000,
	extra_headers={
	'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/klimbr-demo',
	'X-Title': "Klimbr demo space"
	}
	)

	partial_message = ""
	for chunk in response:
	if chunk.choices[0].delta.content is not None:
	content = chunk.choices[0].delta.content
	escaped_content = html.escape(content)
	partial_message += escaped_content
	yield partial_message

	logo = open('./logo.svg').read()

	# we chose a few models across the smaller model classes to give a sense of the technique
	MODEL_CHOICES = {
	"llama2-13b-4k": [
	"NousResearch/Nous-Hermes-Llama2-13b",
	],
	"llama3-8b-8k": [
	"meta-llama/Meta-Llama-3-8B-Instruct",
	"NousResearch/Hermes-2-Theta-Llama-3-8B",
	"aaditya/Llama3-OpenBioLLM-8B",
	"elyza/Llama-3-ELYZA-JP-8B",
	"mlabonne/NeuralDaredevil-8B-abliterated",
	],
	"llama31-8b-16k": [
	"meta-llama/Meta-Llama-3.1-8B-Instruct",
	"NousResearch/Hermes-3-Llama-3.1-8B",
	"shenzhi-wang/Llama3.1-8B-Chinese-Chat",
	"AXCXEPT/Llama-3.1-8B-EZO-1.1-it",
	"mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated",
	"VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct",
	],
	"mistral-v02-7b-lc": [
	"HuggingFaceH4/zephyr-7b-beta",
	"mlabonne/NeuralDaredevil-7B",
	"HuggingFaceH4/zephyr-7b-alpha",
	],
	"mistral-nemo-12b-lc": [
	"mistralai/Mistral-Nemo-Instruct-2407",
	],
	"rwvk-14b-lc": [
	"m8than/apple-rwkv-1-c-14b",
	],
	}

	def build_model_choices():
	all_choices = []
	for model_class_name in MODEL_CHOICES:
	model_class = MODEL_CHOICES[model_class_name]
	all_choices += [ (f"{model_id} ({model_class_name})", model_id) for model_id in model_class ]

	return all_choices

	model_choices = build_model_choices()

	def initial_model(referer=None):
	return "mistralai/Mistral-Nemo-Instruct-2407"
	# let's use a random but different model each day.
	# key=os.environ.get('RANDOM_SEED', 'kcOtfNHA+e')
	# o = random.Random(f"{key}-{datetime.date.today().strftime('%Y-%m-%d')}")
	# return o.choice(model_choices)[1]

	title_text="Klimbr token input pre-processor demo space"
	klimbr_url="https://github.com/av/klmbr"
	css = """
	.logo-mark { fill: #ffe184; }

	/* from https://github.com/gradio-app/gradio/issues/4001
	* necessary as putting ChatInterface in gr.Blocks changes behaviour
	*/

	.contain { display: flex; flex-direction: column; }
	.gradio-container { height: 100vh !important; }
	#component-0 { height: 100%; }
	#chatbot { flex-grow: 1; overflow: auto;}
	.lead-text {
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: center;
	padding: 20px;
	box-sizing: border-box;
	}

	.content {
	max-width: 60vh;
	text-align: center;
	font-size: 15pt;
	}

	.h1 {
	margin-bottom: 20px;
	}
	"""
	with gr.Blocks(title_text, css=css) as demo:
	gr.HTML(f"""
	<div class="lead-text">
	<h1 align="center"><a href="{klimbr_url}">Klimbr</a> demo space</h1>
	<div class="content">
	<p>
	Klimbr is a technique to increase entropy in LLM outputs
	by adding entropy to the input prompt prior to inference.
	</p>
	<p>
	For details on the technique see <a href="{klimbr_url}">the klimbr github</a>
	or the source code of this space.
	</p>
	</div>
	""")

	# hidden_state = gr.State(value=initial_model)
	percentage = gr.Slider(
	minimum=0,
	maximum=1,
	value=0.65,
	label="Percentage of input text to randomize"
	)

	with gr.Row():
	model_selector = gr.Dropdown(
	label="Select your Model",
	choices=model_choices,
	value=initial_model,
	# value=hidden_state,
	scale=4
	)
	gr.Button(
	value="Visit Model Card ↗️",
	scale=1
	).click(
	inputs=[model_selector],
	js="(model_selection) => { window.open(`https://huggingface.co/${model_selection}`, '_blank') }",
	fn=None,
	)

	gr.ChatInterface(
	respond,
	additional_inputs=[model_selector, percentage],
	head=""",
	<script>console.log("Hello from gradio!")</script>
	""",
	concurrency_limit=5
	)
	gr.HTML(f"""
	<p align="center">
	Inference by <a href="https://featherless.ai">{logo}</a>
	</p>
	""")
	def update_initial_model_choice(request: gr.Request):
	return initial_model(request.headers.get('referer'))

	demo.load(update_initial_model_choice, outputs=model_selector)

	demo.launch()