Spaces:
Runtime error
Runtime error
from ctransformers import AutoModelForCausalLM | |
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # the device to load the model onto | |
device = 'cpu' | |
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system. | |
llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7b-Chat-GGUF", model_file="llama-2-7b-chat.Q4_K_M.gguf", model_type="llama", gpu_layers=0) | |
def generate_answer(query, sample_num): | |
results = list() | |
for _ in range(sample_num): | |
results.append(llm(query)) | |
return results | |