gordonchan's picture
Upload 41 files
ca56e6a verified
from api.models import GENERATE_ENGINE
from api.utils.request import llama_outer_lock, llama_inner_lock
def get_llama_cpp_engine():
# NOTE: This double lock allows the currently streaming model to
# check if any other requests are pending in the same thread and cancel
# the stream if so.
llama_outer_lock.acquire()
release_outer_lock = True
try:
llama_inner_lock.acquire()
try:
llama_outer_lock.release()
release_outer_lock = False
yield GENERATE_ENGINE
finally:
llama_inner_lock.release()
finally:
if release_outer_lock:
llama_outer_lock.release()