Spaces:
Sleeping
Sleeping
Update app/services/reviewer_service.py
Browse files- app/services/reviewer_service.py +81 -53
app/services/reviewer_service.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
| 1 |
import json
|
| 2 |
import logging
|
| 3 |
-
import re
|
| 4 |
-
|
| 5 |
from app.core.model_loader import llm_engine
|
| 6 |
|
| 7 |
logger = logging.getLogger(__name__)
|
| 8 |
|
| 9 |
-
|
| 10 |
class AIReviewerService:
|
| 11 |
def __init__(self):
|
| 12 |
pass
|
|
@@ -14,117 +13,146 @@ class AIReviewerService:
|
|
| 14 |
def review_batch_code(self, files: list) -> list:
|
| 15 |
results = []
|
| 16 |
|
| 17 |
-
#
|
| 18 |
-
# 5 files per batch means 1 request covers 5 files.
|
| 19 |
batch_size = 5
|
| 20 |
|
| 21 |
for i in range(0, len(files), batch_size):
|
| 22 |
batch = files[i : i + batch_size]
|
| 23 |
combined_code = ""
|
| 24 |
-
file_names = []
|
|
|
|
| 25 |
for f in batch:
|
| 26 |
-
# Minify code to save tokens
|
| 27 |
raw_content = f.content or ""
|
| 28 |
-
# Limit to 6k chars per file to
|
| 29 |
minified_content = self._minify_code(raw_content[:6000], f.fileName)
|
| 30 |
|
| 31 |
combined_code += f"\n--- FILE: {f.fileName} ---\n{minified_content}\n"
|
| 32 |
file_names.append(f.fileName)
|
| 33 |
|
|
|
|
| 34 |
prompt = f"""
|
| 35 |
-
Analyze {len(batch)} files
|
| 36 |
{combined_code}
|
| 37 |
|
| 38 |
-
Task:
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
"""
|
| 42 |
try:
|
| 43 |
-
# 8k output tokens
|
| 44 |
response_text = llm_engine.generate(prompt, max_tokens=8192)
|
| 45 |
batch_results = self._parse_json(response_text)
|
| 46 |
|
| 47 |
-
#
|
| 48 |
processed_map = {item.get('fileName'): item for item in batch_results if isinstance(item, dict)}
|
| 49 |
|
| 50 |
for fn in file_names:
|
| 51 |
if fn in processed_map:
|
| 52 |
res = processed_map[fn]
|
|
|
|
|
|
|
| 53 |
res.setdefault("vulnerabilities", [])
|
| 54 |
-
res.setdefault("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
results.append(res)
|
| 56 |
else:
|
| 57 |
-
# Fallback if AI
|
| 58 |
-
results.append({
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
except Exception as e:
|
| 61 |
-
logger.error(f"Batch error: {e}")
|
| 62 |
for fn in file_names:
|
| 63 |
-
results.append(
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
return results
|
| 68 |
|
| 69 |
def _minify_code(self, code: str, filename: str) -> str:
|
| 70 |
"""
|
| 71 |
-
|
| 72 |
-
Removes:
|
| 73 |
-
1. Empty lines
|
| 74 |
-
2. Full-line comments
|
| 75 |
-
3. Inline comments
|
| 76 |
-
4. Logging/Print statements
|
| 77 |
"""
|
| 78 |
lines = code.split('\n')
|
| 79 |
cleaned_lines = []
|
| 80 |
|
| 81 |
-
# Determine comment style
|
| 82 |
is_python = filename.endswith('.py')
|
| 83 |
-
is_js_style = filename.endswith(('.js', '.ts', '.jsx', '.tsx', '.java', '.
|
| 84 |
|
| 85 |
for line in lines:
|
| 86 |
stripped = line.strip()
|
| 87 |
|
| 88 |
-
|
| 89 |
-
if not stripped:
|
| 90 |
-
continue
|
| 91 |
|
| 92 |
-
#
|
| 93 |
if is_python and stripped.startswith('#'): continue
|
| 94 |
if is_js_style and stripped.startswith(('//', '/*', '*')): continue
|
| 95 |
|
| 96 |
-
#
|
| 97 |
-
|
| 98 |
-
if 'console.log' in stripped or 'print(' in stripped or 'logger.' in stripped or 'System.out.print' in stripped:
|
| 99 |
continue
|
| 100 |
|
| 101 |
-
#
|
| 102 |
-
|
| 103 |
-
if
|
| 104 |
-
line = line.split(' #', 1)[0]
|
| 105 |
-
|
| 106 |
-
if is_js_style and ' //' in line:
|
| 107 |
-
line = line.split(' //', 1)[0]
|
| 108 |
|
| 109 |
-
|
| 110 |
-
if not line.strip():
|
| 111 |
-
continue
|
| 112 |
|
| 113 |
cleaned_lines.append(line.rstrip())
|
| 114 |
|
| 115 |
return '\n'.join(cleaned_lines)
|
| 116 |
|
| 117 |
def _parse_json(self, text: str):
|
|
|
|
|
|
|
|
|
|
| 118 |
text = text.strip()
|
| 119 |
if not text:
|
| 120 |
return []
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
text = re.sub(r"
|
|
|
|
| 124 |
text = text.replace("```json", "").replace("```", "").strip()
|
|
|
|
| 125 |
try:
|
| 126 |
-
|
| 127 |
-
return data
|
| 128 |
except json.JSONDecodeError as e:
|
| 129 |
-
logger.warning(f"JSON
|
| 130 |
-
return []
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import json
|
| 2 |
import logging
|
| 3 |
+
import re
|
| 4 |
+
from typing import List, Dict
|
| 5 |
from app.core.model_loader import llm_engine
|
| 6 |
|
| 7 |
logger = logging.getLogger(__name__)
|
| 8 |
|
|
|
|
| 9 |
class AIReviewerService:
|
| 10 |
def __init__(self):
|
| 11 |
pass
|
|
|
|
| 13 |
def review_batch_code(self, files: list) -> list:
|
| 14 |
results = []
|
| 15 |
|
| 16 |
+
# Process in batches of 5 to stay within Gemini Free Tier limits (15 RPM)
|
|
|
|
| 17 |
batch_size = 5
|
| 18 |
|
| 19 |
for i in range(0, len(files), batch_size):
|
| 20 |
batch = files[i : i + batch_size]
|
| 21 |
combined_code = ""
|
| 22 |
+
file_names = []
|
| 23 |
+
|
| 24 |
for f in batch:
|
| 25 |
+
# Minify code to save tokens
|
| 26 |
raw_content = f.content or ""
|
| 27 |
+
# Limit to 6k chars per file to stay within context window safely
|
| 28 |
minified_content = self._minify_code(raw_content[:6000], f.fileName)
|
| 29 |
|
| 30 |
combined_code += f"\n--- FILE: {f.fileName} ---\n{minified_content}\n"
|
| 31 |
file_names.append(f.fileName)
|
| 32 |
|
| 33 |
+
# Updated prompt to explicitly request suggestions and improvements
|
| 34 |
prompt = f"""
|
| 35 |
+
Analyze the following {len(batch)} source code files for security vulnerabilities and code quality.
|
| 36 |
{combined_code}
|
| 37 |
|
| 38 |
+
Task:
|
| 39 |
+
1. Detect severe security/logic issues (Vulnerabilities).
|
| 40 |
+
2. If a vulnerability is found, provide a concise 'suggestion' on how to fix it.
|
| 41 |
+
3. If NO vulnerabilities are found in a file, provide a list of 'improvement_suggestions' (clean code, performance, or architecture tips).
|
| 42 |
+
4. Provide metrics for complexity and maintainability (scale 1-10).
|
| 43 |
+
|
| 44 |
+
Output a JSON array (exactly 1 object per file):
|
| 45 |
+
[
|
| 46 |
+
{{
|
| 47 |
+
"fileName": "exact/path/from/header",
|
| 48 |
+
"vulnerabilities": [
|
| 49 |
+
{{
|
| 50 |
+
"type": "SQLi/Logic/etc",
|
| 51 |
+
"line": 10,
|
| 52 |
+
"description": "Short explanation",
|
| 53 |
+
"suggestion": "Specific code fix"
|
| 54 |
+
}}
|
| 55 |
+
],
|
| 56 |
+
"improvement_suggestions": ["Tip 1", "Tip 2"],
|
| 57 |
+
"metrics": {{"complexity": 3, "maintainability": 8}}
|
| 58 |
+
}}
|
| 59 |
+
]
|
| 60 |
"""
|
| 61 |
try:
|
| 62 |
+
# 8k output tokens for the batch analysis
|
| 63 |
response_text = llm_engine.generate(prompt, max_tokens=8192)
|
| 64 |
batch_results = self._parse_json(response_text)
|
| 65 |
|
| 66 |
+
# Map results by fileName for easy lookup
|
| 67 |
processed_map = {item.get('fileName'): item for item in batch_results if isinstance(item, dict)}
|
| 68 |
|
| 69 |
for fn in file_names:
|
| 70 |
if fn in processed_map:
|
| 71 |
res = processed_map[fn]
|
| 72 |
+
|
| 73 |
+
# Ensure all required keys exist to prevent Android serialization errors
|
| 74 |
res.setdefault("vulnerabilities", [])
|
| 75 |
+
res.setdefault("improvement_suggestions", [])
|
| 76 |
+
res.setdefault("metrics", {"complexity": 5, "maintainability": 5})
|
| 77 |
+
|
| 78 |
+
# Ensure every vulnerability has a suggestion field
|
| 79 |
+
for vuln in res["vulnerabilities"]:
|
| 80 |
+
if "suggestion" not in vuln:
|
| 81 |
+
vuln["suggestion"] = "Review the implementation logic for improved safety."
|
| 82 |
+
|
| 83 |
results.append(res)
|
| 84 |
else:
|
| 85 |
+
# Fallback if the AI skipped a file in its response
|
| 86 |
+
results.append({
|
| 87 |
+
"fileName": fn,
|
| 88 |
+
"vulnerabilities": [],
|
| 89 |
+
"improvement_suggestions": ["No immediate improvements identified."],
|
| 90 |
+
"metrics": {"complexity": 1, "maintainability": 10}
|
| 91 |
+
})
|
| 92 |
|
| 93 |
except Exception as e:
|
| 94 |
+
logger.error(f"Batch processing error: {e}")
|
| 95 |
for fn in file_names:
|
| 96 |
+
results.append({
|
| 97 |
+
"fileName": fn,
|
| 98 |
+
"vulnerabilities": [],
|
| 99 |
+
"improvement_suggestions": [],
|
| 100 |
+
"metrics": {"complexity": 0, "maintainability": 0}
|
| 101 |
+
})
|
| 102 |
|
| 103 |
return results
|
| 104 |
|
| 105 |
def _minify_code(self, code: str, filename: str) -> str:
|
| 106 |
"""
|
| 107 |
+
Removes comments, empty lines, and logs to optimize token usage.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
"""
|
| 109 |
lines = code.split('\n')
|
| 110 |
cleaned_lines = []
|
| 111 |
|
|
|
|
| 112 |
is_python = filename.endswith('.py')
|
| 113 |
+
is_js_style = filename.endswith(('.js', '.ts', '.jsx', '.tsx', '.java', '.kt', '.c', '.cpp'))
|
| 114 |
|
| 115 |
for line in lines:
|
| 116 |
stripped = line.strip()
|
| 117 |
|
| 118 |
+
if not stripped: continue
|
|
|
|
|
|
|
| 119 |
|
| 120 |
+
# Skip comments
|
| 121 |
if is_python and stripped.startswith('#'): continue
|
| 122 |
if is_js_style and stripped.startswith(('//', '/*', '*')): continue
|
| 123 |
|
| 124 |
+
# Skip common logging
|
| 125 |
+
if any(log in stripped for log in ['console.log', 'print(', 'logger.', 'Log.d', 'Log.e']):
|
|
|
|
| 126 |
continue
|
| 127 |
|
| 128 |
+
# Strip inline comments
|
| 129 |
+
if is_python and ' #' in line: line = line.split(' #', 1)[0]
|
| 130 |
+
if is_js_style and ' //' in line: line = line.split(' //', 1)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
+
if not line.strip(): continue
|
|
|
|
|
|
|
| 133 |
|
| 134 |
cleaned_lines.append(line.rstrip())
|
| 135 |
|
| 136 |
return '\n'.join(cleaned_lines)
|
| 137 |
|
| 138 |
def _parse_json(self, text: str):
|
| 139 |
+
"""
|
| 140 |
+
Cleans and parses the LLM response into a Python list/dict.
|
| 141 |
+
"""
|
| 142 |
text = text.strip()
|
| 143 |
if not text:
|
| 144 |
return []
|
| 145 |
+
|
| 146 |
+
# Clean markdown formatting if present
|
| 147 |
+
text = re.sub(r"^[^[]*\[", "[", text)
|
| 148 |
+
text = re.sub(r"\][^]]*$", "]", text)
|
| 149 |
text = text.replace("```json", "").replace("```", "").strip()
|
| 150 |
+
|
| 151 |
try:
|
| 152 |
+
return json.loads(text)
|
|
|
|
| 153 |
except json.JSONDecodeError as e:
|
| 154 |
+
logger.warning(f"JSON Decode Error: {e}")
|
| 155 |
+
return []
|
| 156 |
+
|
| 157 |
+
# Instantiate the service
|
| 158 |
+
service = AIReviewerService()
|