Spaces:

Sachin5112
/

Fast-AI

Running

App Files Files Community

Fast-AI / app.py

Sachin5112

Update app.py

8d63cc4 verified about 13 hours ago

raw

history blame contribute delete

3.72 kB

	import os
	import time
	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	from fastapi import FastAPI
	from pydantic import BaseModel
	from threading import Thread
	import uvicorn

	# ----------------------------
	# Model
	# ----------------------------
	model_path = hf_hub_download(
	repo_id="bartowski/Qwen2.5-Coder-0.5B-Instruct-abliterated-GGUF",
	filename="Qwen2.5-Coder-0.5B-Instruct-abliterated-f16.gguf"
	)

	llm = Llama(
	model_path=model_path,
	n_ctx=4096,
	n_threads=os.cpu_count(),
	n_batch=512,
	n_gpu_layers=0,
	verbose=False
	)

	llm("warmup", max_tokens=1)

	# ----------------------------
	# System Prompt
	# ----------------------------
	SYSTEM_PROMPT = """
	You are an advanced AI assistant.
	Answer questions clearly and concisely.
	You can handle multi-turn conversations and provide detailed responses if needed.
	"""

	# ----------------------------
	# Chat Function
	# ----------------------------
	def generate_response(message, history):
	yield "🤖 Thinking..."
	time.sleep(0.5)

	prompt = f"<\|im_start\|>system\n{SYSTEM_PROMPT}<\|im_end\|>\n"
	for h in history:
	if isinstance(h, dict) and "role" in h and "message" in h:
	role = h["role"]
	msg = h["message"]
	if role == "user":
	prompt += f"<\|im_start\|>user\n{msg}<\|im_end\|>\n"
	else:
	prompt += f"<\|im_start\|>assistant\n{msg}<\|im_end\|>\n"
	elif isinstance(h, (list, tuple)) and len(h) >= 2:
	u, a = h[0], h[1]
	prompt += f"<\|im_start\|>user\n{u}<\|im_end\|>\n<\|im_start\|>assistant\n{a}<\|im_end\|>\n"

	prompt += f"<\|im_start\|>user\n{message}<\|im_end\|>\n<\|im_start\|>assistant\n"

	output = ""
	for token in llm(prompt, max_tokens=2048, temperature=0.2, top_p=0.9, repeat_penalty=1.1, stream=True):
	output += token["choices"][0]["text"]
	yield output

	# ----------------------------
	# FastAPI API
	# ----------------------------
	app = FastAPI()

	class ChatRequest(BaseModel):
	message: str
	history: list = []

	# FastAPI endpoint ko simple banayein taaki Web Search client connect kar sake
	@app.post("/generate_response") # Path ko match karne ke liye change kiya
	def chat_endpoint(request: ChatRequest):
	output = ""
	# Prompt logic (Same as yours)
	prompt = f"<\|im_start\|>system\n{SYSTEM_PROMPT}<\|im_end\|>\n"
	# ... baki prompt logic ...

	# Streaming off rakhein API response ke liye taaki ek baar mein pura text mile
	res = llm(prompt, max_tokens=1024, temperature=0.3)
	return res["choices"][0]["text"]

	# ----------------------------
	# Gradio UI
	# ----------------------------
	with gr.Blocks(theme=gr.Theme.from_hub("JackismyShephard/ultimate-rvc-theme")) as demo:
	gr.HTML("<h2 style='text-align:center; color:white;'>Code Explainer AI</h2>")

	chatbot = gr.ChatInterface(
	fn=generate_response,
	chatbot=gr.Chatbot(height=600),
	textbox=gr.Textbox(placeholder="Paste code or ask for explanation...", container=False)
	)

	# Rounded corners for main container
	demo.css = """
	.gradio-container {
	border-radius: 25px !important;
	max-width: 600px !important;
	margin: auto !important;
	overflow: hidden;
	}
	.message.user { border-radius: 18px 18px 4px 18px !important; }
	.message.bot { border-radius: 18px 18px 18px 4px !important; }
	"""

	# ----------------------------
	# Run Gradio + FastAPI together
	# ----------------------------
	def run_gradio():
	demo.launch(server_name="0.0.0.0", server_port=7860)

	thread = Thread(target=run_gradio)
	thread.start()

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=8000)