aiflows
/

ChatFlowModule

Model card Files Files and versions Community

ChatFlowModule / run.py

nbaldwin

Updated run.py to support Huggingface backend (#2)

cd18bf6 verified 12 months ago

raw

history blame contribute delete

3.99 kB

	import os

	import hydra

	import aiflows
	from aiflows.backends.api_info import ApiInfo
	from aiflows.utils.general_helpers import read_yaml_file, quick_load_api_keys

	from aiflows import logging
	from aiflows.flow_cache import CACHING_PARAMETERS, clear_cache

	from aiflows.utils import serving
	from aiflows.workers import run_dispatch_worker_thread
	from aiflows.messages import FlowMessage
	from aiflows.interfaces import KeyInterface
	from aiflows.utils.colink_utils import start_colink_server
	from aiflows.workers import run_dispatch_worker_thread

	CACHING_PARAMETERS.do_caching = False # Set to True in order to disable caching
	# clear_cache() # Uncomment this line to clear the cache

	logging.set_verbosity_debug()
	# Comment out if using huggingface backend
	# The set_verbose() method is used for debugging and logging the outputs and drop_params() method automatically drops the parameters which are not set my litellm (since the response format of OpenAI and huggingface is different we require this to translate Huggingface to OpenAI response format).
	# import litellm
	# litellm.set_verbose=True
	# litellm.drop_params=True

	dependencies = [
	{"url": "aiflows/ChatFlowModule", "revision": os.getcwd()},
	]
	from aiflows import flow_verse
	flow_verse.sync_dependencies(dependencies)

	if __name__ == "__main__":

	#1. ~~~~~ Set up a colink server ~~~~
	FLOW_MODULES_PATH = "./"

	cl = start_colink_server()


	#2. ~~~~~Load flow config~~~~~~
	root_dir = "."
	cfg_path = os.path.join(root_dir, "demo.yaml")
	cfg = read_yaml_file(cfg_path)

	#2.1 ~~~ Set the API information ~~~
	# OpenAI backend
	api_information = [ApiInfo(backend_used="openai",
	api_key = os.getenv("OPENAI_API_KEY"))]
	#Huggingface backend
	# Here the API_BASE is the api link to your hosted server. By this way, you can run any flows on your own server.
	# api_information = [ApiInfo(backend_used="huggingface",
	# api_key = os.getenv("HUGGINGFACE_API_KEY"), api_base="http://0.0.0.0:5000/v1/completions")]
	# # Azure backend
	# api_information = ApiInfo(backend_used = "azure",
	# api_base = os.getenv("AZURE_API_BASE"),
	# api_key = os.getenv("AZURE_OPENAI_KEY"),
	# api_version = os.getenv("AZURE_API_VERSION") )

	quick_load_api_keys(cfg, api_information, key="api_infos")


	#3. ~~~~ Serve The Flow ~~~~
	serving.serve_flow(
	cl=cl,
	flow_class_name="flow_modules.aiflows.ChatFlowModule.ChatAtomicFlow",
	flow_endpoint="ChatAtomicFlow",
	)

	#4. ~~~~~Start A Worker Thread~~~~~
	run_dispatch_worker_thread(cl)

	#5. ~~~~~Mount the flow and get its proxy~~~~~~
	proxy_flow= serving.get_flow_instance(
	cl=cl,
	flow_endpoint="ChatAtomicFlow",
	user_id="local",
	config_overrides= cfg
	)


	#6. ~~~ Get the data ~~~
	data = {"id": 0, "question": "What is the capital of France?"} # This can be a list of samples
	# data = {"id": 0, "question": "Who was the NBA champion in 2023?"} # This can be a list of samples


	#option2: use the proxy_flow
	input_message = proxy_flow.package_input_message(data = data)

	#7. ~~~ Run inference ~~~
	future = proxy_flow.get_reply_future(input_message)

	#uncomment this line if you would like to get the full message back
	#reply_message = future.get_message()
	reply_data = future.get_data()

	# ~~~ Print the output ~~~
	print("~~~~~~Reply~~~~~~")
	print(reply_data)


	#8. ~~~~ (Optional) apply output interface on reply ~~~~
	# output_interface = KeyInterface(
	# keys_to_rename={"api_output": "answer"},
	# )
	# print("Output: ", output_interface(reply_data))


	#9. ~~~~~Optional: Unserve Flow~~~~~~
	# serving.delete_served_flow(cl, "ReverseNumberAtomicFlow_served")