ThongCoding commited on
Commit
062433d
·
1 Parent(s): d6bf7fd
Files changed (1) hide show
  1. app.py +21 -27
app.py CHANGED
@@ -1,51 +1,45 @@
1
- from fastapi import FastAPI, Request
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
- from huggingface_hub import hf_hub_download
5
  import os
 
6
 
7
  app = FastAPI()
8
 
9
- # === Configuration ===
10
- REPO_ID = "google/gemma-2b-it-GGUF"
11
- FILENAME = "gemma-2b-it.gguf"
12
- HF_TOKEN = os.environ.get("HF_TOKEN")
13
  MODEL_DIR = "./models"
14
- CACHE_DIR = "./models/.hf_cache"
15
- MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)
16
 
17
- # === Ensure directories exist ===
 
 
18
  os.makedirs(MODEL_DIR, exist_ok=True)
19
- os.makedirs(CACHE_DIR, exist_ok=True)
20
 
21
- # === Download model if not exists ===
22
  if not os.path.exists(MODEL_PATH):
23
- try:
24
- print("📦 Downloading model from Hugging Face Hub...")
25
- hf_hub_download(
26
- repo_id=REPO_ID,
27
- filename=FILENAME,
28
- token=HF_TOKEN,
29
- cache_dir=CACHE_DIR,
30
- local_dir=MODEL_DIR,
31
- local_dir_use_symlinks=False,
32
- )
33
- print(f"✅ Model downloaded to {MODEL_PATH}")
34
- except Exception as e:
35
- print(f"❌ Download failed: {e}")
36
- raise
37
 
38
  # === Load model ===
39
  print("🔧 Loading GGUF model...")
40
  llm = Llama(model_path=MODEL_PATH, n_ctx=512)
41
 
42
- # === Request schema ===
43
  class PromptRequest(BaseModel):
44
  prompt: str
45
  max_tokens: int = 256
46
  temperature: float = 0.7
47
 
48
- # === Inference endpoint ===
49
  @app.post("/prompt")
50
  def generate_prompt(req: PromptRequest):
51
  output = llm(
 
1
+ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
 
4
  import os
5
+ import requests
6
 
7
  app = FastAPI()
8
 
9
+ # === Constants ===
10
+ MODEL_REPO = "google/gemma-2b-it-GGUF"
11
+ MODEL_FILE = "gemma-2b-it.gguf"
12
+ MODEL_URL = f"https://huggingface.co/{MODEL_REPO}/resolve/main/{MODEL_FILE}"
13
  MODEL_DIR = "./models"
14
+ MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILE)
 
15
 
16
+ HF_TOKEN = os.getenv("HF_TOKEN")
17
+
18
+ # === Create model directory ===
19
  os.makedirs(MODEL_DIR, exist_ok=True)
 
20
 
21
+ # === Manual download of GGUF ===
22
  if not os.path.exists(MODEL_PATH):
23
+ print("📦 Downloading GGUF model manually from Hugging Face...")
24
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
25
+ response = requests.get(MODEL_URL, headers=headers, stream=True)
26
+ if response.status_code != 200:
27
+ raise RuntimeError(f"❌ Failed to download model. Status: {response.status_code}")
28
+ with open(MODEL_PATH, "wb") as f:
29
+ for chunk in response.iter_content(chunk_size=8192):
30
+ f.write(chunk)
31
+ print(f"✅ Model downloaded to {MODEL_PATH}")
 
 
 
 
 
32
 
33
  # === Load model ===
34
  print("🔧 Loading GGUF model...")
35
  llm = Llama(model_path=MODEL_PATH, n_ctx=512)
36
 
37
+ # === Inference ===
38
  class PromptRequest(BaseModel):
39
  prompt: str
40
  max_tokens: int = 256
41
  temperature: float = 0.7
42
 
 
43
  @app.post("/prompt")
44
  def generate_prompt(req: PromptRequest):
45
  output = llm(