Spaces:

VikranthBhat
/

virtual-coding-agent

Sleeping

App Files Files Community

virtual-coding-agent / app.py

VikranthBhat

Update app.py

9fe70b7 verified 6 days ago

raw

history blame contribute delete

1.95 kB

	import os
	from fastapi import FastAPI, Request
	from fastapi.responses import StreamingResponse
	from huggingface_hub import InferenceClient
	import json
	import asyncio

	app = FastAPI()

	# Get your token from Hugging Face Secrets (Settings > Secrets)
	HF_TOKEN = os.getenv("HF_TOKEN")
	# Model choice (e.g., "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct")
	MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct" #"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"

	client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)

	@app.get("/")
	def health_check():
	return {"status": "Agent Active", "model": MODEL_ID}

	@app.post("/v1/chat/completions")
	async def chat_completions(request: Request):
	body = await request.json()
	messages = body.get("messages", [])
	stream = body.get("stream", False)

	if stream:
	return StreamingResponse(
	stream_generator(messages),
	media_type="text/event-stream"
	)
	else:
	# Standard non-streaming response
	response = client.chat_completion(
	messages=messages,
	max_tokens=body.get("max_tokens", 1024),
	temperature=body.get("temperature", 0.7),
	)
	return response

	async def stream_generator(messages):
	"""Generates an OpenAI-compatible SSE stream"""
	for chunk in client.chat_completion(
	messages=messages,
	max_tokens=2048,
	stream=True,
	):
	# Format the chunk to look like OpenAI's wire format
	data = {
	"id": "chatcmpl-custom",
	"object": "chat.completion.chunk",
	"choices": [{
	"delta": {"content": chunk.choices[0].delta.content},
	"finish_reason": chunk.choices[0].finish_reason,
	"index": 0
	}]
	}
	yield f"data: {json.dumps(data)}\n\n"
	yield "data: [DONE]\n\n"

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)