Fast-AI / app.py
Sachin5112's picture
Update app.py
8d63cc4 verified
import os
import time
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
from fastapi import FastAPI
from pydantic import BaseModel
from threading import Thread
import uvicorn
# ----------------------------
# Model
# ----------------------------
model_path = hf_hub_download(
repo_id="bartowski/Qwen2.5-Coder-0.5B-Instruct-abliterated-GGUF",
filename="Qwen2.5-Coder-0.5B-Instruct-abliterated-f16.gguf"
)
llm = Llama(
model_path=model_path,
n_ctx=4096,
n_threads=os.cpu_count(),
n_batch=512,
n_gpu_layers=0,
verbose=False
)
llm("warmup", max_tokens=1)
# ----------------------------
# System Prompt
# ----------------------------
SYSTEM_PROMPT = """
You are an advanced AI assistant.
Answer questions clearly and concisely.
You can handle multi-turn conversations and provide detailed responses if needed.
"""
# ----------------------------
# Chat Function
# ----------------------------
def generate_response(message, history):
yield "🤖 Thinking..."
time.sleep(0.5)
prompt = f"<|im_start|>system\n{SYSTEM_PROMPT}<|im_end|>\n"
for h in history:
if isinstance(h, dict) and "role" in h and "message" in h:
role = h["role"]
msg = h["message"]
if role == "user":
prompt += f"<|im_start|>user\n{msg}<|im_end|>\n"
else:
prompt += f"<|im_start|>assistant\n{msg}<|im_end|>\n"
elif isinstance(h, (list, tuple)) and len(h) >= 2:
u, a = h[0], h[1]
prompt += f"<|im_start|>user\n{u}<|im_end|>\n<|im_start|>assistant\n{a}<|im_end|>\n"
prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
output = ""
for token in llm(prompt, max_tokens=2048, temperature=0.2, top_p=0.9, repeat_penalty=1.1, stream=True):
output += token["choices"][0]["text"]
yield output
# ----------------------------
# FastAPI API
# ----------------------------
app = FastAPI()
class ChatRequest(BaseModel):
message: str
history: list = []
# FastAPI endpoint ko simple banayein taaki Web Search client connect kar sake
@app.post("/generate_response") # Path ko match karne ke liye change kiya
def chat_endpoint(request: ChatRequest):
output = ""
# Prompt logic (Same as yours)
prompt = f"<|im_start|>system\n{SYSTEM_PROMPT}<|im_end|>\n"
# ... baki prompt logic ...
# Streaming off rakhein API response ke liye taaki ek baar mein pura text mile
res = llm(prompt, max_tokens=1024, temperature=0.3)
return res["choices"][0]["text"]
# ----------------------------
# Gradio UI
# ----------------------------
with gr.Blocks(theme=gr.Theme.from_hub("JackismyShephard/ultimate-rvc-theme")) as demo:
gr.HTML("<h2 style='text-align:center; color:white;'>Code Explainer AI</h2>")
chatbot = gr.ChatInterface(
fn=generate_response,
chatbot=gr.Chatbot(height=600),
textbox=gr.Textbox(placeholder="Paste code or ask for explanation...", container=False)
)
# Rounded corners for main container
demo.css = """
.gradio-container {
border-radius: 25px !important;
max-width: 600px !important;
margin: auto !important;
overflow: hidden;
}
.message.user { border-radius: 18px 18px 4px 18px !important; }
.message.bot { border-radius: 18px 18px 18px 4px !important; }
"""
# ----------------------------
# Run Gradio + FastAPI together
# ----------------------------
def run_gradio():
demo.launch(server_name="0.0.0.0", server_port=7860)
thread = Thread(target=run_gradio)
thread.start()
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)