metrosir
/

sd

Inference Endpoints

Model card Files Files and versions Community

sd / modules /call_queue.py

zhengqilin

add init

1976a91 about 1 year ago

raw history blame contribute delete

No virus

3.5 kB

	import html
	import sys
	import threading
	import traceback
	import time

	from modules import shared, progress

	queue_lock = threading.Lock()


	def wrap_queued_call(func):
	def f(args, *kwargs):
	with queue_lock:
	res = func(args, *kwargs)

	return res

	return f


	def wrap_gradio_gpu_call(func, extra_outputs=None):
	def f(args, *kwargs):

	# if the first argument is a string that says "task(...)", it is treated as a job id
	if len(args) > 0 and type(args[0]) == str and args[0][0:5] == "task(" and args[0][-1] == ")":
	id_task = args[0]
	progress.add_task_to_queue(id_task)
	else:
	id_task = None

	with queue_lock:
	shared.state.begin()
	progress.start_task(id_task)

	try:
	res = func(args, *kwargs)
	finally:
	progress.finish_task(id_task)

	shared.state.end()

	return res

	return wrap_gradio_call(f, extra_outputs=extra_outputs, add_stats=True)


	def wrap_gradio_call(func, extra_outputs=None, add_stats=False):
	def f(args, extra_outputs_array=extra_outputs, *kwargs):
	run_memmon = shared.opts.memmon_poll_rate > 0 and not shared.mem_mon.disabled and add_stats
	if run_memmon:
	shared.mem_mon.monitor()
	t = time.perf_counter()

	try:
	res = list(func(args, *kwargs))
	except Exception as e:
	# When printing out our debug argument list, do not print out more than a MB of text
	max_debug_str_len = 131072 # (1024*1024)/8

	print("Error completing request", file=sys.stderr)
	argStr = f"Arguments: {str(args)} {str(kwargs)}"
	print(argStr[:max_debug_str_len], file=sys.stderr)
	if len(argStr) > max_debug_str_len:
	print(f"(Argument list truncated at {max_debug_str_len}/{len(argStr)} characters)", file=sys.stderr)

	print(traceback.format_exc(), file=sys.stderr)

	shared.state.job = ""
	shared.state.job_count = 0

	if extra_outputs_array is None:
	extra_outputs_array = [None, '']

	res = extra_outputs_array + [f"<div class='error'>{html.escape(type(e).__name__+': '+str(e))}</div>"]

	shared.state.skipped = False
	shared.state.interrupted = False
	shared.state.job_count = 0

	if not add_stats:
	return tuple(res)

	elapsed = time.perf_counter() - t
	elapsed_m = int(elapsed // 60)
	elapsed_s = elapsed % 60
	elapsed_text = f"{elapsed_s:.2f}s"
	if elapsed_m > 0:
	elapsed_text = f"{elapsed_m}m "+elapsed_text

	if run_memmon:
	mem_stats = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.stop().items()}
	active_peak = mem_stats['active_peak']
	reserved_peak = mem_stats['reserved_peak']
	sys_peak = mem_stats['system_peak']
	sys_total = mem_stats['total']
	sys_pct = round(sys_peak/max(sys_total, 1) * 100, 2)

	vram_html = f"<p class='vram'>Torch active/reserved: {active_peak}/{reserved_peak} MiB, <wbr>Sys VRAM: {sys_peak}/{sys_total} MiB ({sys_pct}%)</p>"
	else:
	vram_html = ''

	# last item is always HTML
	res[-1] += f"<div class='performance'><p class='time'>Time taken: <wbr>{elapsed_text}</p>{vram_html}</div>"

	return tuple(res)

	return f