Commit
·
062433d
1
Parent(s):
d6bf7fd
asdwd
Browse files
app.py
CHANGED
|
@@ -1,51 +1,45 @@
|
|
| 1 |
-
from fastapi import FastAPI
|
| 2 |
from pydantic import BaseModel
|
| 3 |
from llama_cpp import Llama
|
| 4 |
-
from huggingface_hub import hf_hub_download
|
| 5 |
import os
|
|
|
|
| 6 |
|
| 7 |
app = FastAPI()
|
| 8 |
|
| 9 |
-
# ===
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
MODEL_DIR = "./models"
|
| 14 |
-
|
| 15 |
-
MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
os.makedirs(MODEL_DIR, exist_ok=True)
|
| 19 |
-
os.makedirs(CACHE_DIR, exist_ok=True)
|
| 20 |
|
| 21 |
-
# ===
|
| 22 |
if not os.path.exists(MODEL_PATH):
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
)
|
| 33 |
-
print(f"✅ Model downloaded to {MODEL_PATH}")
|
| 34 |
-
except Exception as e:
|
| 35 |
-
print(f"❌ Download failed: {e}")
|
| 36 |
-
raise
|
| 37 |
|
| 38 |
# === Load model ===
|
| 39 |
print("🔧 Loading GGUF model...")
|
| 40 |
llm = Llama(model_path=MODEL_PATH, n_ctx=512)
|
| 41 |
|
| 42 |
-
# ===
|
| 43 |
class PromptRequest(BaseModel):
|
| 44 |
prompt: str
|
| 45 |
max_tokens: int = 256
|
| 46 |
temperature: float = 0.7
|
| 47 |
|
| 48 |
-
# === Inference endpoint ===
|
| 49 |
@app.post("/prompt")
|
| 50 |
def generate_prompt(req: PromptRequest):
|
| 51 |
output = llm(
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
from pydantic import BaseModel
|
| 3 |
from llama_cpp import Llama
|
|
|
|
| 4 |
import os
|
| 5 |
+
import requests
|
| 6 |
|
| 7 |
app = FastAPI()
|
| 8 |
|
| 9 |
+
# === Constants ===
|
| 10 |
+
MODEL_REPO = "google/gemma-2b-it-GGUF"
|
| 11 |
+
MODEL_FILE = "gemma-2b-it.gguf"
|
| 12 |
+
MODEL_URL = f"https://huggingface.co/{MODEL_REPO}/resolve/main/{MODEL_FILE}"
|
| 13 |
MODEL_DIR = "./models"
|
| 14 |
+
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILE)
|
|
|
|
| 15 |
|
| 16 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 17 |
+
|
| 18 |
+
# === Create model directory ===
|
| 19 |
os.makedirs(MODEL_DIR, exist_ok=True)
|
|
|
|
| 20 |
|
| 21 |
+
# === Manual download of GGUF ===
|
| 22 |
if not os.path.exists(MODEL_PATH):
|
| 23 |
+
print("📦 Downloading GGUF model manually from Hugging Face...")
|
| 24 |
+
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
| 25 |
+
response = requests.get(MODEL_URL, headers=headers, stream=True)
|
| 26 |
+
if response.status_code != 200:
|
| 27 |
+
raise RuntimeError(f"❌ Failed to download model. Status: {response.status_code}")
|
| 28 |
+
with open(MODEL_PATH, "wb") as f:
|
| 29 |
+
for chunk in response.iter_content(chunk_size=8192):
|
| 30 |
+
f.write(chunk)
|
| 31 |
+
print(f"✅ Model downloaded to {MODEL_PATH}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
# === Load model ===
|
| 34 |
print("🔧 Loading GGUF model...")
|
| 35 |
llm = Llama(model_path=MODEL_PATH, n_ctx=512)
|
| 36 |
|
| 37 |
+
# === Inference ===
|
| 38 |
class PromptRequest(BaseModel):
|
| 39 |
prompt: str
|
| 40 |
max_tokens: int = 256
|
| 41 |
temperature: float = 0.7
|
| 42 |
|
|
|
|
| 43 |
@app.post("/prompt")
|
| 44 |
def generate_prompt(req: PromptRequest):
|
| 45 |
output = llm(
|