Spaces:
Sleeping
Sleeping
File size: 1,597 Bytes
434144a c355718 434144a c355718 469f650 c355718 469f650 434144a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
from os import environ as env
from os import system as run
from subprocess import check_output
import gradio as gr
def inference_binary_check():
# Without a GPU, we need to re-install llama-cpp-python to avoid an error.
# We use a shell command to detect if we have an NVIDIA GPU available:
use_gpu = True
try:
command = "nvidia-debugdump --list|grep Device"
output = str(check_output(command, shell=True).decode())
if "NVIDIA" in output and "ID" in output:
print("NVIDIA GPU detected.")
except Exception as e:
print("No NVIDIA GPU detected, using CPU. GPU check result:", e)
use_gpu = False
if use_gpu:
print("GPU detected, existing GPU focused llama-cpp-python should work.")
else:
print("Avoiding error by re-installing non-GPU llama-cpp-python build because no GPU was detected.")
run('pip uninstall llama-cpp-python -y')
run('pip install git+https://github.com/lukestanley/llama-cpp-python.git@expose_json_grammar_convert_function --upgrade --no-cache-dir --force-reinstall')
print("llama-cpp-python re-installed, will now attempt to load.")
LLM_WORKER = env.get("LLM_WORKER", "runpod")
if LLM_WORKER == "http" or LLM_WORKER == "in_memory":
inference_binary_check()
# Now chill can import llama-cpp-python without an error:
from chill import improvement_loop
def chill_out(text):
print("Got this input:", text)
return str(improvement_loop(text))
demo = gr.Interface(fn=chill_out, inputs="text", outputs="text")
demo.launch(max_threads=1, share=True) |