Spaces:

sasan
/

KITT

Build error

App Files Files Community

KITT / space.py

sasan

chore: Update requirements.txt and add MeloTTS dependency

dca05b7 5 months ago

raw

history blame contribute delete

17.2 kB

	import subprocess

	subprocess.run(
	"pip install flash-attn --no-build-isolation",
	env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
	shell=True,
	)
	subprocess.run(
	"python -m unidic download",
	shell=True,
	)

	import gradio as gr
	import spaces
	from langchain.tools import tool
	from langchain_community.chat_message_histories import ChatMessageHistory
	from langchain_core.utils.function_calling import convert_to_openai_tool
	from loguru import logger

	from kitt.core import tts_gradio
	from kitt.core import utils as kitt_utils
	from kitt.core import voice_options
	from kitt.core.model import generate_function_call as process_query
	from kitt.core.stt import transcribe_audio
	from kitt.core.tts import prep_for_tts, run_melo_tts, run_tts_replicate
	from kitt.skills import (
	code_interpreter,
	date_time_info,
	do_anything_else,
	extract_func_args,
	find_route,
	get_forecast,
	get_weather,
	get_weather_current_location,
	search_along_route_w_coordinates,
	search_points_of_interest,
	set_vehicle_destination,
	set_vehicle_speed,
	)
	from kitt.skills.common import config, vehicle
	from kitt.skills.routing import calculate_route, find_address

	ORIGIN = "Luxembourg, Luxembourg"
	DESTINATION = "Paris, France"
	DEFAULT_LLM_BACKEND = "local"
	ENABLE_HISTORY = True
	ENABLE_TTS = True
	TTS_BACKEND = "local"
	USER_PREFERENCES = "User prefers italian food."

	global_context = {
	"vehicle": vehicle,
	"query": "How is the weather?",
	"route_points": [],
	"origin": ORIGIN,
	"destination": DESTINATION,
	"enable_history": ENABLE_HISTORY,
	"tts_enabled": ENABLE_TTS,
	"tts_backend": TTS_BACKEND,
	"llm_backend": DEFAULT_LLM_BACKEND,
	"map_origin": ORIGIN,
	"map_destination": DESTINATION,
	"update_proxy": 0,
	"map": None,
	}

	speaker_embedding_cache = {}
	history = ChatMessageHistory()


	# Generate options for hours (00-23)
	hour_options = [f"{i:02d}:00:00" for i in range(24)]


	@tool
	def search_along_route(query=""):
	"""Search for points of interest along the route/way to the destination.

	Args:
	query (str, optional): The type of point of interest to search for. Defaults to "restaurant".

	"""
	points = global_context["route_points"]
	# maybe reshape
	return search_along_route_w_coordinates(points, query)


	def set_time(time_picker):
	vehicle.time = time_picker
	return vehicle


	functions = [
	# set_vehicle_speed,
	set_vehicle_destination,
	get_weather,
	find_route,
	search_points_of_interest,
	search_along_route,
	]
	openai_tools = [convert_to_openai_tool(tool) for tool in functions]


	def clear_history():
	logger.info("Clearing the conversation history...")
	history.clear()


	@spaces.GPU
	def run_llama3_model(query, voice_character, state):

	assert len(functions) > 0, "No functions to call"
	assert len(openai_tools) > 0, "No openai tools to call"

	output_text = process_query(
	query,
	history=history,
	user_preferences=state["user_preferences"],
	tools=openai_tools,
	functions=functions,
	backend=state["llm_backend"],
	)
	gr.Info(f"Output text: {output_text}\nGenerating voice output...")
	output_text_tts = prep_for_tts(output_text)
	voice_out = None
	if global_context["tts_enabled"]:
	if "Fast" in voice_character:
	voice_out = run_melo_tts(output_text_tts, voice_character)
	elif global_context["tts_backend"] == "replicate":
	voice_out = run_tts_replicate(output_text_tts, voice_character)
	else:
	voice_out = tts_gradio(
	output_text_tts, voice_character, speaker_embedding_cache
	)[0]
	return (
	output_text,
	voice_out,
	)


	def run_model(query, voice_character, state):
	model = state.get("model", "llama3")
	query = query.strip().replace("'", "")
	logger.info(
	f"Running model: {model} with query: {query}, voice_character: {voice_character} and llm_backend: {state['llm_backend']}, tts_enabled: {state['tts_enabled']}"
	)
	global_context["query"] = query
	text, voice = run_llama3_model(query, voice_character, state)

	if not state["enable_history"]:
	history.clear()
	global_context["update_proxy"] += 1

	return (
	text,
	voice,
	vehicle.model_dump(),
	state,
	dict(update_proxy=global_context["update_proxy"]),
	)


	def calculate_route_gradio(origin, destination):
	_, points = calculate_route(origin, destination)
	plot = kitt_utils.plot_route(points, vehicle=vehicle.location_coordinates)
	global_context["map"] = plot
	global_context["route_points"] = points
	# state.value["route_points"] = points
	vehicle.location_coordinates = points[0]["latitude"], points[0]["longitude"]
	return plot, vehicle.model_dump(), 0


	def update_vehicle_status(trip_progress, origin, destination, state):
	if not global_context["route_points"]:
	_, points = calculate_route(origin, destination)
	global_context["route_points"] = points
	global_context["destination"] = destination
	global_context["route_points"] = global_context["route_points"]
	n_points = len(global_context["route_points"])
	index = min(int(trip_progress / 100 * n_points), n_points - 1)
	logger.info(f"Trip progress: {trip_progress} len: {n_points}, index: {index}")
	new_coords = global_context["route_points"][index]
	new_coords = new_coords["latitude"], new_coords["longitude"]
	logger.info(
	f"Trip progress: {trip_progress}, len: {n_points}, new_coords: {new_coords}"
	)
	vehicle.location_coordinates = new_coords
	new_vehicle_location = find_address(new_coords[0], new_coords[1])
	vehicle.location = new_vehicle_location
	plot = kitt_utils.plot_route(
	global_context["route_points"], vehicle=vehicle.location_coordinates
	)
	return vehicle, plot, state


	@spaces.GPU
	def save_and_transcribe_run_model(audio, voice_character, state):
	text = transcribe_audio(audio)
	out_text, out_voice, vehicle_status, state, update_proxy = run_model(
	text, voice_character, state
	)
	return None, text, out_text, out_voice, vehicle_status, state, update_proxy


	def set_tts_enabled(tts_enabled, state):
	new_tts_enabled = tts_enabled == "Yes"
	logger.info(
	f"TTS enabled was {state['tts_enabled']} and changed to {new_tts_enabled}"
	)
	state["tts_enabled"] = new_tts_enabled
	global_context["tts_enabled"] = new_tts_enabled
	return state


	def set_llm_backend(llm_backend, state):
	assert llm_backend in ["Ollama", "Replicate", "Local"], "Invalid LLM backend"
	new_llm_backend = llm_backend.lower()
	logger.info(
	f"LLM backend was {state['llm_backend']} and changed to {new_llm_backend}"
	)
	state["llm_backend"] = new_llm_backend
	global_context["llm_backend"] = new_llm_backend
	return state


	def set_user_preferences(preferences, state):
	new_preferences = preferences
	logger.info(f"User preferences changed to: {new_preferences}")
	state["user_preferences"] = new_preferences
	global_context["user_preferences"] = new_preferences
	return state


	def set_enable_history(enable_history, state):
	new_enable_history = enable_history == "Yes"
	logger.info(
	f"Enable history was {state['enable_history']} and changed to {new_enable_history}"
	)
	state["enable_history"] = new_enable_history
	global_context["enable_history"] = new_enable_history
	return state


	def set_tts_backend(tts_backend, state):
	new_tts_backend = tts_backend.lower()
	logger.info(
	f"TTS backend was {state['tts_backend']} and changed to {new_tts_backend}"
	)
	state["tts_backend"] = new_tts_backend
	global_context["tts_backend"] = new_tts_backend
	return state


	def conditional_update():
	if global_context["destination"] != vehicle.destination:
	global_context["destination"] = vehicle.destination

	if global_context["origin"] != vehicle.location:
	global_context["origin"] = vehicle.location

	if (
	global_context["map_origin"] != vehicle.location
	or global_context["map_destination"] != vehicle.destination
	or global_context["update_proxy"] == 0
	):
	logger.info(f"Updating the map plot... in conditional_update")
	map_plot, _, _ = calculate_route_gradio(vehicle.location, vehicle.destination)
	global_context["map"] = map_plot
	return global_context["map"]


	# to be able to use the microphone on chrome, you will have to go to chrome://flags/#unsafely-treat-insecure-origin-as-secure and enter http://10.186.115.21:7860/
	# in "Insecure origins treated as secure", enable it and relaunch chrome

	# example question:
	# what's the weather like outside?
	# What's the closest restaurant from here?


	def create_demo(tts_server: bool = False, model="llama3"):
	print(f"Running the demo with model: {model} and TTSServer: {tts_server}")
	with gr.Blocks(theme=gr.themes.Default(), title="KITT") as demo:
	state = gr.State(
	value={
	# "context": initial_context,
	"query": "",
	"route_points": [],
	"model": model,
	"tts_enabled": ENABLE_TTS,
	"llm_backend": DEFAULT_LLM_BACKEND,
	"user_preferences": USER_PREFERENCES,
	"enable_history": ENABLE_HISTORY,
	"tts_backend": TTS_BACKEND,
	"destination": DESTINATION,
	}
	)

	plot, _, _ = calculate_route_gradio(ORIGIN, DESTINATION)
	global_context["map"] = plot

	with gr.Row():
	# with gr.Row():
	# gr.Text("KITT", interactive=False)
	with gr.Column(scale=1, min_width=300):
	vehicle_status = gr.JSON(
	value=vehicle.model_dump(), label="Vehicle status"
	)
	time_picker = gr.Dropdown(
	choices=hour_options,
	label="What time is it? (HH:MM)",
	value="08:00:00",
	interactive=True,
	)
	voice_character = gr.Radio(
	choices=voice_options,
	label="Choose a voice",
	value=voice_options[0],
	show_label=True,
	)
	# voice_character = gr.Textbox(
	# label="Choose a voice",
	# value="freeman",
	# show_label=True,
	# )
	origin = gr.Textbox(
	value=ORIGIN,
	label="Origin",
	interactive=True,
	)
	destination = gr.Textbox(
	value=DESTINATION,
	label="Destination",
	interactive=True,
	)
	preferences = gr.Textbox(
	value=USER_PREFERENCES,
	label="User preferences",
	lines=3,
	interactive=True,
	)

	with gr.Column(scale=2, min_width=600):
	map_plot = gr.Plot(value=plot, label="Map")
	trip_progress = gr.Slider(
	0, 100, step=5, label="Trip progress", interactive=True
	)

	# with gr.Column(scale=1, min_width=300):
	# gr.Image("linkedin-1.png", label="Linkedin - Sasan Jafarnejad")
	# gr.Image(
	# "team-ubix.png",
	# label="Research Team - UBIX - University of Luxembourg",
	# )

	with gr.Row():
	with gr.Column():
	input_audio = gr.Audio(
	type="numpy",
	sources=["microphone"],
	label="Input audio",
	elem_id="input_audio",
	)
	input_text = gr.Textbox(
	value="How is the weather?", label="Input text", interactive=True
	)
	with gr.Accordion("Debug"):
	input_audio_debug = gr.Audio(
	type="numpy",
	sources=["microphone"],
	label="Input audio",
	elem_id="input_audio",
	)
	input_text_debug = gr.Textbox(
	value="How is the weather?",
	label="Input text",
	interactive=True,
	)
	update_proxy = gr.JSON(
	value=dict(update_proxy=0),
	label="Global context",
	)
	with gr.Accordion("Config"):
	tts_enabled = gr.Radio(
	["Yes", "No"],
	label="Enable TTS",
	value="Yes" if ENABLE_TTS else "No",
	interactive=True,
	)
	tts_backend = gr.Radio(
	["Local"],
	label="TTS Backend",
	value=TTS_BACKEND.title(),
	interactive=True,
	)
	llm_backend = gr.Radio(
	choices=["Ollama", "Local"],
	label="LLM Backend",
	value=DEFAULT_LLM_BACKEND.title(),
	interactive=True,
	)
	enable_history = gr.Radio(
	["Yes", "No"],
	label="Maintain the conversation history?",
	value="Yes" if ENABLE_HISTORY else "No",
	interactive=True,
	)
	# Push button
	clear_history_btn = gr.Button(value="Clear History")
	with gr.Column():
	output_audio = gr.Audio(label="output audio", autoplay=True)
	output_text = gr.TextArea(
	value="", label="Output text", interactive=False
	)

	# Update plot based on the origin and destination
	# Sets the current location and destination
	origin.submit(
	fn=calculate_route_gradio,
	inputs=[origin, destination],
	outputs=[map_plot, vehicle_status, trip_progress],
	)
	destination.submit(
	fn=calculate_route_gradio,
	inputs=[origin, destination],
	outputs=[map_plot, vehicle_status, trip_progress],
	)
	preferences.submit(
	fn=set_user_preferences, inputs=[preferences, state], outputs=[state]
	)

	# Update time based on the time picker
	time_picker.select(fn=set_time, inputs=[time_picker], outputs=[vehicle_status])

	# Run the model if the input text is changed
	input_text.submit(
	fn=run_model,
	inputs=[input_text, voice_character, state],
	outputs=[output_text, output_audio, vehicle_status, state, update_proxy],
	)
	input_text_debug.submit(
	fn=run_model,
	inputs=[input_text_debug, voice_character, state],
	outputs=[output_text, output_audio, vehicle_status, state, update_proxy],
	)

	# Set the vehicle status based on the trip progress
	trip_progress.release(
	fn=update_vehicle_status,
	inputs=[trip_progress, origin, destination, state],
	outputs=[vehicle_status, map_plot, state],
	)

	# Save and transcribe the audio
	input_audio.stop_recording(
	fn=save_and_transcribe_run_model,
	inputs=[input_audio, voice_character, state],
	outputs=[
	input_audio,
	input_text,
	output_text,
	output_audio,
	vehicle_status,
	state,
	update_proxy,
	],
	)
	input_audio_debug.stop_recording(
	fn=transcribe_audio,
	inputs=[input_audio_debug],
	outputs=[input_text_debug],
	)

	# Clear the history
	clear_history_btn.click(fn=clear_history, inputs=[], outputs=[])

	# Config
	tts_enabled.change(
	fn=set_tts_enabled, inputs=[tts_enabled, state], outputs=[state]
	)
	tts_backend.change(
	fn=set_tts_backend, inputs=[tts_backend, state], outputs=[state]
	)
	llm_backend.change(
	fn=set_llm_backend, inputs=[llm_backend, state], outputs=[state]
	)
	enable_history.change(
	fn=set_enable_history, inputs=[enable_history, state], outputs=[state]
	)
	update_proxy.change(fn=conditional_update, inputs=[], outputs=[map_plot])

	return demo


	# close all interfaces open to make the port available
	gr.close_all()


	demo = create_demo(False, "llama3")
	demo.launch(
	debug=True,
	server_name="0.0.0.0",
	server_port=7860,
	ssl_verify=False,
	share=False,
	)