Spaces:

ThongCoder
/

minecraft-ai-builder-backend

Paused

asd

1f23ef2 5 months ago

1.62 kB

	from fastapi import FastAPI
	from pydantic import BaseModel
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import os

	REPO_ID = "google/gemma-2b-it-GGUF"
	FILENAME = "gemma-2b-it.gguf"
	HF_TOKEN = os.environ.get("HF_TOKEN")
	MODEL_DIR = "./models"
	CACHE_DIR = "./models/.hf_cache"
	MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)

	# Make sure directories exist
	os.makedirs(MODEL_DIR, exist_ok=True)
	os.makedirs(CACHE_DIR, exist_ok=True)

	if not os.path.exists(MODEL_PATH):
	try:
	print("📦 Downloading model from Hugging Face Hub...")
	hf_hub_download(
	repo_id=REPO_ID,
	filename=FILENAME,
	token=HF_TOKEN,
	cache_dir=CACHE_DIR,
	local_dir=MODEL_DIR,
	local_dir_use_symlinks=False # even though deprecated, keep for compatibility
	)
	print(f"✅ Model downloaded to {MODEL_PATH}")
	except Exception as e:
	print(f"❌ Download failed: {e}")
	raise

	# Step 2: Load model using llama-cpp-python
	print("🤖 Loading GGUF model...")
	llm = Llama(
	model_path=MODEL_PATH,
	n_ctx=512,
	n_threads=4,
	n_batch=512,
	verbose=False
	)

	# Step 3: FastAPI app
	app = FastAPI()

	class PromptRequest(BaseModel):
	prompt: str

	@app.post("/prompt")
	def generate_prompt(req: PromptRequest):
	prompt = req.prompt.strip()

	output = llm(
	prompt,
	max_tokens=512,
	temperature=0.6,
	top_p=0.95,
	stop=["<\|endoftext\|>", "</s>", "```"],
	echo=False
	)

	result = output["choices"][0]["text"].strip()
	return {"response": result}