CodeCommunity commited on
Commit
ed29fcc
·
verified ·
1 Parent(s): 6ed0915

Create app/services/reviewer_service.py

Browse files
Files changed (1) hide show
  1. app/services/reviewer_service.py +130 -0
app/services/reviewer_service.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import re # For better cleanup
4
+
5
+ from app.core.model_loader import llm_engine
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class AIReviewerService:
11
+ def __init__(self):
12
+ pass
13
+
14
+ def review_batch_code(self, files: list) -> list:
15
+ results = []
16
+
17
+ # Optimized Batching for Free Tier (15 RPM)
18
+ # 5 files per batch means 1 request covers 5 files.
19
+ batch_size = 5
20
+
21
+ for i in range(0, len(files), batch_size):
22
+ batch = files[i : i + batch_size]
23
+ combined_code = ""
24
+ file_names = [] # Track for fallback
25
+ for f in batch:
26
+ # Minify code to save tokens (remove comments/whitespace)
27
+ raw_content = f.content or ""
28
+ # Limit to 6k chars per file to fit 5 files safely in context
29
+ minified_content = self._minify_code(raw_content[:6000], f.fileName)
30
+
31
+ combined_code += f"\n--- FILE: {f.fileName} ---\n{minified_content}\n"
32
+ file_names.append(f.fileName)
33
+
34
+ prompt = f"""
35
+ Analyze {len(batch)} files:
36
+ {combined_code}
37
+
38
+ Task: Detect severe security/logic issues.
39
+ Output JSON array (1 obj/file):
40
+ [{{"fileName": "path", "vulnerabilities": [{{"type": "SQLi", "line": 10, "description": "text"}}], "metrics": {{"complexity": 1-10, "maintainability": 1-10}}}}]
41
+ """
42
+ try:
43
+ # 8k output tokens is plenty for 5 files
44
+ response_text = llm_engine.generate(prompt, max_tokens=8192)
45
+ batch_results = self._parse_json(response_text)
46
+
47
+ # Validation Logic
48
+ processed_map = {item.get('fileName'): item for item in batch_results if isinstance(item, dict)}
49
+
50
+ for fn in file_names:
51
+ if fn in processed_map:
52
+ res = processed_map[fn]
53
+ res.setdefault("vulnerabilities", [])
54
+ res.setdefault("metrics", {})
55
+ results.append(res)
56
+ else:
57
+ # Fallback if AI missed a file in the JSON list
58
+ results.append({"fileName": fn, "vulnerabilities": [], "metrics": {}})
59
+
60
+ except Exception as e:
61
+ logger.error(f"Batch error: {e}")
62
+ for fn in file_names:
63
+ results.append(
64
+ {"fileName": fn, "vulnerabilities": [], "metrics": {}}
65
+ )
66
+
67
+ return results
68
+
69
+ def _minify_code(self, code: str, filename: str) -> str:
70
+ """
71
+ Aggressive minification to reduce token usage.
72
+ Removes:
73
+ 1. Empty lines
74
+ 2. Full-line comments
75
+ 3. Inline comments
76
+ 4. Logging/Print statements
77
+ """
78
+ lines = code.split('\n')
79
+ cleaned_lines = []
80
+
81
+ # Determine comment style
82
+ is_python = filename.endswith('.py')
83
+ is_js_style = filename.endswith(('.js', '.ts', '.jsx', '.tsx', '.java', '.c', '.cpp', '.cs', '.go', '.rs', '.php'))
84
+
85
+ for line in lines:
86
+ stripped = line.strip()
87
+
88
+ # 1. Skip empty lines
89
+ if not stripped:
90
+ continue
91
+
92
+ # 2. Skip full-line comments
93
+ if is_python and stripped.startswith('#'): continue
94
+ if is_js_style and stripped.startswith(('//', '/*', '*')): continue
95
+
96
+ # 3. Skip logging (High token cost, low security relevance usually)
97
+ # Check for common logging patterns
98
+ if 'console.log' in stripped or 'print(' in stripped or 'logger.' in stripped or 'System.out.print' in stripped:
99
+ continue
100
+
101
+ # 4. Strip inline comments
102
+ # Heuristic: split on " //" or " #" to avoid breaking URLs (http://)
103
+ if is_python and ' #' in line:
104
+ line = line.split(' #', 1)[0]
105
+
106
+ if is_js_style and ' //' in line:
107
+ line = line.split(' //', 1)[0]
108
+
109
+ # If line became empty after stripping
110
+ if not line.strip():
111
+ continue
112
+
113
+ cleaned_lines.append(line.rstrip())
114
+
115
+ return '\n'.join(cleaned_lines)
116
+
117
+ def _parse_json(self, text: str):
118
+ text = text.strip()
119
+ if not text:
120
+ return []
121
+ # Aggressive cleanup: remove common junk
122
+ text = re.sub(r"^[^[]*\[", "[", text) # Trim before [
123
+ text = re.sub(r"\][^]]*$", "]", text) # Trim after ]
124
+ text = text.replace("```json", "").replace("```", "").strip()
125
+ try:
126
+ data = json.loads(text)
127
+ return data
128
+ except json.JSONDecodeError as e:
129
+ logger.warning(f"JSON error: {e} | Raw start: {text[:200]}...")
130
+ return []