|
from application.llm.base import BaseLLM |
|
from application.core.settings import settings |
|
|
|
class LlamaCpp(BaseLLM): |
|
|
|
def __init__(self, api_key, llm_name=settings.MODEL_PATH, **kwargs): |
|
global llama |
|
try: |
|
from llama_cpp import Llama |
|
except ImportError: |
|
raise ImportError("Please install llama_cpp using pip install llama-cpp-python") |
|
|
|
llama = Llama(model_path=llm_name, n_ctx=2048) |
|
|
|
def gen(self, model, engine, messages, stream=False, **kwargs): |
|
context = messages[0]['content'] |
|
user_question = messages[-1]['content'] |
|
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n" |
|
|
|
result = llama(prompt, max_tokens=150, echo=False) |
|
|
|
|
|
|
|
|
|
return result['choices'][0]['text'].split('### Answer \n')[-1] |
|
|
|
def gen_stream(self, model, engine, messages, stream=True, **kwargs): |
|
context = messages[0]['content'] |
|
user_question = messages[-1]['content'] |
|
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n" |
|
|
|
result = llama(prompt, max_tokens=150, echo=False, stream=stream) |
|
|
|
|
|
|
|
|
|
for item in result: |
|
for choice in item['choices']: |
|
yield choice['text'] |
|
|