Spaces:
Sleeping
Sleeping
File size: 14,563 Bytes
86ed234 ac4ae39 86ed234 ac4ae39 86ed234 ac4ae39 86ed234 261e0ce 86ed234 ac4ae39 86ed234 ac4ae39 86ed234 ac4ae39 261e0ce a6bd5c0 261e0ce ac4ae39 261e0ce ac4ae39 7a1ebee ac4ae39 e6eaeb3 ac4ae39 261e0ce 7a1ebee ac4ae39 261e0ce 86ed234 e6eaeb3 ac4ae39 e6eaeb3 7a1ebee e6eaeb3 ac4ae39 58c90e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 |
"""
helpers/coder.py
Single-agent code generation using NVIDIA Qwen3 Coder model with Chain of Thought reasoning.
Produces files-by-files Markdown with per-file explanations. Designed to be called from
report generation to attach code outputs to the appropriate subsection.
"""
import os
from typing import Optional
from utils.logger import get_logger
from utils.service.common import trim_text
logger = get_logger("CODER", __name__)
# Get the NVIDIA coder model from environment
NVIDIA_CODER = os.getenv("NVIDIA_CODER", "qwen/qwen3-coder-480b-a35b-instruct")
async def generate_code_artifacts(
subsection_id: str,
task: str,
reasoning: str,
context_text: str,
web_context: str,
gemini_rotator,
nvidia_rotator,
user_id: str = ""
) -> str:
"""Generate code (files-by-files) with explanations using NVIDIA Qwen3 Coder with CoT reasoning.
Enhanced workflow:
1. Use NVIDIA_LARGE to analyze and enhance the task requirements
2. Use NVIDIA_CODER to generate the actual code based on enhanced requirements
Returns a Markdown string containing multiple code blocks. Each block is
preceded by a heading like `File: path` and followed by a short
explanation. The content is grounded in provided contexts.
"""
from utils.api.router import nvidia_large_chat_completion
logger.info(f"[CODER] Starting enhanced code generation for subsection {subsection_id} (task='{task[:60]}...')")
# Track analytics for the coding agent
try:
from utils.analytics import get_analytics_tracker
tracker = get_analytics_tracker()
if tracker and user_id:
await tracker.track_agent_usage(
user_id=user_id,
agent_name="coding",
action="code",
context="report_coding",
metadata={"subsection_id": subsection_id, "model": NVIDIA_CODER}
)
except Exception:
pass
# Step 1: Use NVIDIA_LARGE to analyze and enhance the task requirements
logger.info(f"[CODER] Step 1: Analyzing task with NVIDIA_LARGE for subsection {subsection_id}")
analysis_system_prompt = (
"You are a senior software architect and technical lead. Your task is to analyze a coding requirement "
"and provide a comprehensive, enhanced specification that will be used by a code generation AI.\n\n"
"ANALYSIS REQUIREMENTS:\n"
"1. Break down the task into clear, actionable components\n"
"2. Identify potential technical challenges and solutions\n"
"3. Suggest appropriate technologies, frameworks, and patterns\n"
"4. Define clear requirements and constraints\n"
"5. Identify dependencies and relationships between components\n"
"6. Consider scalability, maintainability, and best practices\n\n"
"OUTPUT FORMAT:\n"
"Provide a structured analysis in the following format:\n"
"- **Task Analysis**: Clear breakdown of what needs to be implemented\n"
"- **Technical Requirements**: Specific technical specifications\n"
"- **Architecture Suggestions**: Recommended structure and patterns\n"
"- **Dependencies**: Required libraries, frameworks, or external services\n"
"- **Implementation Notes**: Key considerations for the implementation\n"
"- **Enhanced Task Description**: A refined, detailed task description for code generation"
)
analysis_user_prompt = (
f"ORIGINAL TASK: {task}\n"
f"ORIGINAL REASONING: {reasoning}\n"
f"SUBSECTION: {subsection_id}\n\n"
f"CONTEXT (DOCUMENT):\n{trim_text(context_text or '', 8000)}\n\n"
f"CONTEXT (WEB):\n{trim_text(web_context or '', 4000)}\n\n"
"Please analyze this coding task and provide a comprehensive enhancement that will guide the code generation process."
)
try:
enhanced_analysis = await nvidia_large_chat_completion(analysis_system_prompt, analysis_user_prompt, nvidia_rotator)
logger.info(f"[CODER] Task analysis completed for subsection {subsection_id}")
# Track NVIDIA_LARGE usage
try:
if tracker and user_id:
await tracker.track_model_usage(
user_id=user_id,
model_name=os.getenv("NVIDIA_LARGE", "openai/gpt-oss-120b"),
provider="nvidia_large",
context="code_analysis",
metadata={"subsection_id": subsection_id}
)
except Exception:
pass
except Exception as e:
logger.warning(f"[CODER] Task analysis failed for subsection {subsection_id}: {e}")
enhanced_analysis = f"**Task Analysis**: {task}\n**Technical Requirements**: {reasoning}\n**Enhanced Task Description**: {task}"
# Step 2: Use NVIDIA_CODER to generate code based on enhanced analysis
logger.info(f"[CODER] Step 2: Generating code with NVIDIA_CODER for subsection {subsection_id}")
# Enhanced system prompt with Chain of Thought reasoning
system_prompt = (
"You are a senior software engineer with expertise in code generation and architecture design.\n"
"Your task is to generate production-quality code based on the ENHANCED ANALYSIS provided below.\n\n"
"REASONING PROCESS (Chain of Thought):\n"
"1. First, analyze the enhanced requirements and constraints\n"
"2. Identify the key components and their relationships\n"
"3. Consider the context and any existing patterns or frameworks\n"
"4. Plan the code structure and architecture\n"
"5. Generate clean, maintainable code with proper error handling\n"
"6. Ensure code follows best practices and is production-ready\n\n"
"OUTPUT FORMAT:\n"
"- Output Markdown with multiple code blocks by file, each preceded by a short heading 'File: path'\n"
"- Prefer clear, minimal dependencies\n"
"- After each code block, add a concise explanation of design decisions\n"
"- Ensure coherent naming and imports across files\n"
"- If mentioning endpoints/APIs, ensure consistency across files\n"
"- Do not include meta text like 'Here is the code'. Start with the first file heading\n"
"- Include proper error handling, documentation, and testing considerations\n"
)
# Enhanced user prompt with the analysis results
user_prompt = (
f"SUBSECTION: {subsection_id}\n"
f"ENHANCED ANALYSIS:\n{enhanced_analysis}\n\n"
f"ORIGINAL CONTEXT (DOCUMENT):\n{trim_text(context_text or '', 6000)}\n\n"
f"ORIGINAL CONTEXT (WEB):\n{trim_text(web_context or '', 3000)}\n\n"
"Please follow this reasoning process:\n"
"1. Analyze the enhanced requirements and identify what needs to be implemented\n"
"2. Consider the provided context and any relevant patterns or frameworks\n"
"3. Plan the code structure, including file organization and dependencies\n"
"4. Generate clean, production-ready code with proper error handling\n"
"5. Ensure code follows best practices and is maintainable\n\n"
"Produce the code files and explanations as specified."
)
# Use the new NVIDIA coder function
code_md = await nvidia_coder_completion(system_prompt, user_prompt, nvidia_rotator, user_id, "coding")
code_md = (code_md or "").strip()
# Track NVIDIA_CODER usage
try:
if tracker and user_id:
await tracker.track_model_usage(
user_id=user_id,
model_name=os.getenv("NVIDIA_CODER", "qwen/qwen3-coder-480b-a35b-instruct"),
provider="nvidia_coder",
context="report_coding",
metadata={"subsection_id": subsection_id}
)
except Exception:
pass
if not code_md:
logger.warning(f"[CODER] Empty code output for subsection {subsection_id}")
return "Code generation produced no content."
# Light post-check: ensure at least one fenced code block
if "```" not in code_md:
logger.warning(f"[CODER] No code fences detected for subsection {subsection_id}")
else:
logger.info(f"[CODER] Code fences detected for subsection {subsection_id}")
return code_md
async def nvidia_coder_completion(system_prompt: str, user_prompt: str, nvidia_rotator, user_id: str = None, context: str = "") -> str:
"""
NVIDIA Coder completion using the specified coder model with streaming support.
Uses the NVIDIA API rotator for key management and supports Chain of Thought reasoning.
"""
# Track model usage for analytics
try:
from utils.analytics import get_analytics_tracker
tracker = get_analytics_tracker()
if tracker and user_id:
await tracker.track_model_usage(
user_id=user_id,
model_name=os.getenv("NVIDIA_CODER", "qwen/qwen3-coder-480b-a35b-instruct"),
provider="nvidia_coder",
context=context or "nvidia_coder_completion",
metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
)
except Exception as e:
logger.debug(f"[CODER] Analytics tracking failed: {e}")
key = nvidia_rotator.get_key() or ""
url = "https://integrate.api.nvidia.com/v1/chat/completions"
payload = {
"model": NVIDIA_CODER,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
"temperature": 0.7,
"top_p": 0.8,
"max_tokens": 4096,
"stream": True
}
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
logger.info(f"[NVIDIA_CODER] API call - Model: {NVIDIA_CODER}, Key present: {bool(key)}")
logger.info(f"[NVIDIA_CODER] System prompt length: {len(system_prompt)}, User prompt length: {len(user_prompt)}")
try:
# For streaming, we need to handle the response differently
import httpx
async with httpx.AsyncClient(timeout=120) as client: # Longer timeout for code generation
response = await client.post(url, headers=headers, json=payload)
if response.status_code in (401, 403, 429) or (500 <= response.status_code < 600):
logger.warning(f"HTTP {response.status_code} from NVIDIA Coder provider. Rotating key and retrying")
nvidia_rotator.rotate()
# Retry once with new key
key = nvidia_rotator.get_key() or ""
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
response = await client.post(url, headers=headers, json=payload)
response.raise_for_status()
# Handle streaming response
content = ""
async for line in response.aiter_lines():
if line.startswith("data: "):
data = line[6:] # Remove "data: " prefix
if data.strip() == "[DONE]":
break
try:
import json
chunk_data = json.loads(data)
if "choices" in chunk_data and len(chunk_data["choices"]) > 0:
delta = chunk_data["choices"][0].get("delta", {})
# Handle reasoning content (thinking) for CoT
reasoning = delta.get("reasoning_content")
if reasoning:
logger.debug(f"[NVIDIA_CODER] Reasoning: {reasoning}")
# Handle regular content
chunk_content = delta.get("content")
if chunk_content:
content += chunk_content
except json.JSONDecodeError:
continue
if not content or content.strip() == "":
logger.warning(f"Empty content from NVIDIA Coder model")
return "I received an empty response from the model."
return content.strip()
except Exception as e:
logger.warning(f"NVIDIA Coder API error: {e}")
return "I couldn't process the request with NVIDIA Coder model."
def extract_structured_code(markdown: str):
"""Extract structured code blocks from the Gemini output.
Expects sections like:
'File: path/to/file.py' followed by a fenced code block and then an explanation paragraph.
Returns list of {path, language, code, explanation}.
"""
import re
blocks = []
if not markdown:
return blocks
# Split on 'File:' headings to locate file sections
parts = re.split(r"\n(?=File:\s*)", markdown)
for part in parts:
part = part.strip()
if not part.lower().startswith("file:"):
# The first chunk may be prelude; skip if no code block
continue
# Extract path
m_path = re.match(r"File:\s*(.+)", part)
file_path = m_path.group(1).strip() if m_path else "unknown"
# Extract fenced code block with optional language
m_code = re.search(r"```([a-zA-Z0-9_+-]*)\n([\s\S]*?)\n```", part)
language = (m_code.group(1) or '').strip() if m_code else ''
code = m_code.group(2) if m_code else ''
# Remove the matched code from part to find explanation remainder
explanation = ''
if m_code:
start, end = m_code.span()
# Text after code block is considered explanation
explanation = part[end:].strip()
blocks.append({
"path": file_path,
"language": language or detect_language_from_path(file_path),
"code": code.strip(),
"explanation": explanation
})
return blocks
def detect_language_from_path(path: str) -> str:
ext = (path.split('.')[-1].lower() if '.' in path else '')
return {
'py': 'python',
'js': 'javascript',
'ts': 'typescript',
'json': 'json',
'md': 'markdown',
'html': 'html',
'css': 'css',
'sh': 'bash',
'yml': 'yaml',
'yaml': 'yaml'
}.get(ext, '')
|