Spaces:
Running
Running
from api.models import GENERATE_ENGINE | |
from api.utils.request import llama_outer_lock, llama_inner_lock | |
def get_llama_cpp_engine(): | |
# NOTE: This double lock allows the currently streaming model to | |
# check if any other requests are pending in the same thread and cancel | |
# the stream if so. | |
llama_outer_lock.acquire() | |
release_outer_lock = True | |
try: | |
llama_inner_lock.acquire() | |
try: | |
llama_outer_lock.release() | |
release_outer_lock = False | |
yield GENERATE_ENGINE | |
finally: | |
llama_inner_lock.release() | |
finally: | |
if release_outer_lock: | |
llama_outer_lock.release() | |