| import os, subprocess, time |
| from typing import Optional |
| import gradio as gr |
| from fastapi import FastAPI, Header, HTTPException |
| from pydantic import BaseModel |
|
|
| MODEL_FILE = "Dolphin-X1-8B.Q4_K_M.gguf" |
| MODEL_URL = "https://huggingface.co/dphn/Dolphin-X1-8B-GGUF/resolve/main/Dolphin-X1-8B.Q4_K_M.gguf" |
| SPACE_API_KEY = os.getenv("SPACE_API_KEY") |
|
|
| BUILD_DIR = "llama" |
| THREADS = "4" |
|
|
| def setup(): |
| if not os.path.exists(MODEL_FILE): |
| subprocess.run(["wget", "-q", MODEL_URL, "-O", MODEL_FILE]) |
|
|
| if not os.path.exists("llama.cpp"): |
| subprocess.run(["git", "clone", "--depth", "1", "https://github.com/ggerganov/llama.cpp"]) |
|
|
| if not os.path.exists(BUILD_DIR): |
| subprocess.run(["cmake", "-S", "llama.cpp", "-B", BUILD_DIR]) |
| subprocess.run(["cmake", "--build", BUILD_DIR, "--config", "Release", "-j", THREADS]) |
|
|
| setup() |
|
|
| app = FastAPI() |
|
|
| class Query(BaseModel): |
| q: str |
|
|
| @app.post("/api/chat") |
| def chat(q: Query, x_api_key: Optional[str] = Header(None)): |
| if x_api_key != SPACE_API_KEY: |
| raise HTTPException(401, "Unauthorized") |
|
|
| p = subprocess.Popen( |
| [f"./{BUILD_DIR}/bin/main", "-m", MODEL_FILE, "-p", q.q, "-n", "200", "-t", THREADS], |
| stdout=subprocess.PIPE |
| ) |
| return {"reply": p.stdout.read().decode(errors="ignore")} |
|
|
| def ui_chat(q): |
| p = subprocess.Popen( |
| [f"./{BUILD_DIR}/bin/main", "-m", MODEL_FILE, "-p", q, "-n", "200", "-t", THREADS], |
| stdout=subprocess.PIPE |
| ) |
| return p.stdout.read().decode(errors="ignore") |
|
|
| gr.mount_gradio_app(app, gr.Interface(ui_chat, gr.Textbox(), gr.Textbox()), path="/") |