|
import os, gradio as gr, requests, tempfile, logging, time |
|
from llama_cpp import Llama |
|
|
|
MODEL_URL = ( |
|
"https://huggingface.co/fdtn-ai/Foundation-Sec-8B-Q4_K_M-GGUF/" |
|
"resolve/main/foundation-sec-8b-q4_k_m.gguf" |
|
) |
|
|
|
CACHE_DIR = "/tmp" |
|
MODEL_PATH = os.path.join(CACHE_DIR, "foundation-sec-8b-q4_k_m.gguf") |
|
|
|
|
|
os.environ["MPLCONFIGDIR"] = CACHE_DIR |
|
|
|
|
|
if not os.path.exists(MODEL_PATH): |
|
logging.info("Downloading model …") |
|
with requests.get(MODEL_URL, stream=True) as r: |
|
r.raise_for_status() |
|
with open(MODEL_PATH, "wb") as f: |
|
for chunk in r.iter_content(chunk_size=8192): |
|
f.write(chunk) |
|
logging.info("Download finished.") |
|
|
|
llm = Llama(model_path=MODEL_PATH, n_ctx=4096, verbose=False) |
|
|
|
|
|
def chat_fn(message, history): |
|
messages = [] |
|
for human, ai in history: |
|
messages.append({"role": "user", "content": human}) |
|
messages.append({"role": "assistant", "content": ai}) |
|
messages.append({"role": "user", "content": message}) |
|
|
|
out = llm.create_chat_completion( |
|
messages=messages, |
|
max_tokens=512, |
|
temperature=0.7, |
|
stream=False, |
|
) |
|
return out["choices"][0]["message"]["content"] |
|
|
|
demo = gr.ChatInterface(chat_fn, title="Foundation-Sec-8B") |
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|