|
import os |
|
import requests |
|
from llama_cpp import Llama |
|
|
|
model_url = "https://huggingface.co/matrixportal/Mistral-7B-Instruct-v0.3-Q4_K_M-GGUF/resolve/main/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf" |
|
model_path = "/home/user/app/mistral.gguf" |
|
|
|
if not os.path.exists(model_path): |
|
print("Downloading model...") |
|
response = requests.get(model_url, stream=True) |
|
with open(model_path, "wb") as f: |
|
for chunk in response.iter_content(chunk_size=8192): |
|
f.write(chunk) |
|
print("Model downloaded successfully.") |
|
|
|
llm = Llama(model_path=model_path, n_ctx=2048, n_threads=8) |
|
|
|
def chat_response(prompt): |
|
response = llm(prompt, max_tokens=200, temperature=0.7) |
|
return response["choices"][0]["text"] |
|
|
|
print(chat_response("Hello!")) |
|
|