Spaces:
Runtime error
Runtime error
from llama_index.legacy.embeddings import HuggingFaceEmbedding | |
from llama_index.legacy.llms import LlamaCPP | |
from llama_index.llms.llama_cpp.llama_utils import ( | |
messages_to_prompt, | |
completion_to_prompt, | |
) | |
llm = LlamaCPP( | |
model_url="https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q4_0" | |
".bin", | |
temperature=0.1, | |
max_new_tokens=256, | |
context_window=3900, | |
generate_kwargs={}, | |
model_kwargs={"n_gpu_layers": 1}, | |
messages_to_prompt=messages_to_prompt, | |
completion_to_prompt=completion_to_prompt, | |
verbose=True, | |
) | |
def get_embed_model(): | |
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") | |
return embed_model | |
async def get_answer(query, context): | |
prompt = f"""Given the context below answer the question. | |
Context: {context} | |
Question: {query} | |
Answer: | |
""" | |
return await llm.acomplete(prompt=prompt) | |