|
import os |
|
import urllib.request |
|
from llama_cpp import Llama |
|
from fastapi import FastAPI |
|
|
|
app = FastAPI(docs_url="/") |
|
|
|
def download_file(file_link, filename): |
|
|
|
if not os.path.isfile(filename): |
|
urllib.request.urlretrieve(file_link, filename) |
|
print("File downloaded successfully.") |
|
else: |
|
print("File already exists.") |
|
|
|
|
|
|
|
ggml_model_path = "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q4_0.gguf" |
|
filename = "zephyr-7b-beta.Q4_0.gguf" |
|
|
|
|
|
|
|
|
|
|
|
llm = Llama(model_path="/home/mo/Desktop/web/oGBackend/qwen1_5-0_5b-chat-q2_k.gguf", n_ctx=512, n_batch=126, chat_format="llama") |
|
|
|
|
|
def generate_text( |
|
prompt="Who is the COlor of Apple?", |
|
max_tokens=256, |
|
temperature=0.7, |
|
top_p=0.5, |
|
echo=False, |
|
stop=["#"], |
|
): |
|
output = llm( |
|
prompt, |
|
max_tokens=max_tokens, |
|
temperature=temperature, |
|
top_p=top_p, |
|
echo=echo, |
|
stop=stop, |
|
) |
|
output_text = output["choices"][0]["text"] |
|
return output_text |
|
|
|
|
|
def generate_prompt_from_template(input): |
|
chat_prompt_template = f"""<|im_start|>system |
|
You are a helpful chatbot.<|im_end|> |
|
<|im_start|>user |
|
{input}<|im_end|>""" |
|
return chat_prompt_template |
|
|
|
@app.get("/generate") |
|
def generate(text: str): |
|
prompt = generate_prompt_from_template(text) |
|
|
|
generate_text( |
|
prompt, |
|
max_tokens=356, |
|
) |
|
|