Spaces:

MINZO4546
/

minzo-api

Build error

App Files Files Community

minzo-api / app.py

MINZO4546

Update app.py

7d813f4 verified 13 days ago

raw

history blame contribute delete

1.88 kB

	import os
	from fastapi import FastAPI
	from llama_cpp import Llama
	import requests

	# 🔱 CPU Core Management: සර්වර් එකේ තියෙන Cores ගණනට Threads සීමා කිරීම
	threads = int(os.cpu_count() or 2)

	# 🔱 Load Model: CPU එකට ගැලපෙන Gemma 3 GGUF මොඩල් එක
	# HF Space එකේදී හරි path එකක් ලබා දීම හෝ Repo ID එක පාවිච්චි කරන්න
	llm = Llama.from_pretrained(
	repo_id="google/gemma-3-1b-it-GGUF",
	filename="*q4_k_m.gguf", # 4-bit Quantized version for best performance
	n_ctx=2048,
	n_threads=threads,
	verbose=False
	)

	main = FastAPI()

	def web_search(query):
	try:
	url = f"https://api.duckduckgo.com/?q={query}&format=json"
	response = requests.get(url, timeout=5).json()
	return response.get("AbstractText", "No data.")
	except:
	return "Search failed."

	@main.post("/v1/chat")
	async def chat(data: dict):
	user_query = data.get("message", "")

	# 🔱 Inachi AI Identity
	system_instr = (
	"You are Inachi AI, developed by the Inachi Team. "
	"You are an expert system architect."
	)

	# Simple search context logic
	search_context = ""
	if "search" in user_query.lower():
	search_context = f"\nContext: {web_search(user_query)}"

	# Prompt Template
	prompt = f"<bos><start_of_turn>system\n{system_instr}{search_context}<end_of_turn>\n<start_of_turn>user\n{user_query}<end_of_turn>\n<start_of_turn>model\n"

	# Generation
	output = llm(
	prompt,
	max_tokens=512,
	stop=["<end_of_turn>"],
	echo=False
	)

	reply = output['choices'][0]['text'].strip()
	return {"reply": reply}

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(main, host="0.0.0.0", port=7860)