Spaces:
Sleeping
Sleeping
from fastapi import FastAPI | |
from fastapi.responses import StreamingResponse | |
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
import asyncio | |
from fastapi.middleware.cors import CORSMiddleware | |
app = FastAPI() | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
# Download the GGUF file | |
model_id = "muhammadnoman76/cortex_q4" | |
gguf_filename = "unsloth.Q4_K_M.gguf" # Replace with the correct filename | |
model_path = hf_hub_download( | |
repo_id=model_id, | |
filename=gguf_filename, | |
local_dir=".", | |
local_dir_use_symlinks=False | |
) | |
alpaca_prompt = """ | |
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. | |
### Instruction: | |
You are an intelligent agent that analyzes user requests and breaks them down into structured components. Your task is to: | |
1. Identify the specific actions needed to complete the request | |
2. Determine which intent-based tools would be appropriate (selecting only from the available intent list) | |
3. Provide brief justifications for why each intent is relevant | |
4. Define the high-level goals the request aims to accomplish | |
5. Generate a concise instruction prompt summarizing how to fulfill the request | |
Available intents = ["schedule", "email", "sms", "whatsapp", "web_search", "parse_document", "visualize_data", "analyze_data", "analyze_image", "gen_code", "gen_image", "calculate", "execute_code", "academic_search", "finance_news", "translation", "url", "database", "social_media"] | |
Important notes: | |
- Provide only the intent category (e.g., "email"), not specific tool names | |
- If you identify a needed intent that isn't in the list above, include it with "(new)" notation | |
- Be concise but thorough in your analysis | |
- Focus on practical implementation rather than theoretical discussion | |
### Input: | |
{} | |
### Response: | |
""" | |
# Load model from local file in the copied folder | |
llm = Llama( | |
model_path= r'.//unsloth.Q4_K_M.gguf', | |
n_ctx=2048, | |
n_batch=512, | |
verbose=False | |
) | |
async def stream_llm_response(task_description: str): | |
prompt = alpaca_prompt.format(task_description) | |
stream = llm( | |
prompt, | |
max_tokens=2048, | |
stream=True, | |
) | |
for output in stream: | |
yield output["choices"][0]["text"] | |
await asyncio.sleep(0) | |
async def stream_response(task: str = "make an agent which send mail by searching top 5 website from google"): | |
return StreamingResponse(stream_llm_response(task), media_type="text/plain") | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=8000) |