Deepanshu3012 commited on
Commit
fd625c0
Β·
1 Parent(s): fc2d789

Add complete Code Review NLP Assistant with Gradio UI, CodeBERT and CodeT5

Browse files
Files changed (3) hide show
  1. app.py +248 -0
  2. data/sample_code.py +151 -0
  3. requirements.txt +1 -1
app.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Code Review NLP Assistant β€” Gradio App
3
+ Run with: python app.py
4
+ """
5
+
6
+ import sys
7
+ import os
8
+ sys.path.insert(0, os.path.dirname(__file__))
9
+
10
+ import gradio as gr
11
+ import plotly.graph_objects as go
12
+
13
+ from models.code_analyzer import CodeReviewAnalyzer
14
+ from utils.helpers import (
15
+ extract_functions,
16
+ extract_classes,
17
+ score_to_grade,
18
+ score_color,
19
+ build_report,
20
+ )
21
+ from data.sample_code import SAMPLES
22
+
23
+ analyzer = CodeReviewAnalyzer(use_gpu=False)
24
+
25
+
26
+ def build_radar(doc_score, name_score, comp_score, overall):
27
+ fig = go.Figure(go.Scatterpolar(
28
+ r=[doc_score, name_score, comp_score, overall, doc_score],
29
+ theta=["Documentation", "Naming", "Complexity", "Overall", "Documentation"],
30
+ fill="toself",
31
+ fillcolor="rgba(99,102,241,0.2)",
32
+ line=dict(color="#6366f1", width=2),
33
+ ))
34
+ fig.update_layout(
35
+ polar=dict(radialaxis=dict(visible=True, range=[0, 100])),
36
+ showlegend=False,
37
+ margin=dict(l=40, r=40, t=40, b=40),
38
+ height=320,
39
+ )
40
+ return fig
41
+
42
+
43
+ def build_bars(doc_score, name_score, comp_score):
44
+ fig = go.Figure(go.Bar(
45
+ x=["Documentation", "Naming", "Complexity"],
46
+ y=[doc_score, name_score, comp_score],
47
+ marker_color=[
48
+ score_color(doc_score),
49
+ score_color(name_score),
50
+ score_color(comp_score),
51
+ ],
52
+ text=[str(doc_score), str(name_score), str(comp_score)],
53
+ textposition="outside",
54
+ ))
55
+ fig.update_layout(
56
+ yaxis=dict(range=[0, 115]),
57
+ margin=dict(l=20, r=20, t=20, b=20),
58
+ height=300,
59
+ )
60
+ return fig
61
+
62
+
63
+ def analyze_code(code, sample_choice, generate_doc, get_embed):
64
+ if sample_choice != "None" and not code.strip():
65
+ code = SAMPLES[sample_choice]
66
+
67
+ if not code.strip():
68
+ return (
69
+ "<p>⚠️ Please paste some code or pick a sample.</p>",
70
+ None, None, "", "", "", "", ""
71
+ )
72
+
73
+ result = analyzer.analyze(
74
+ code,
75
+ generate_doc=generate_doc,
76
+ get_embedding=get_embed,
77
+ )
78
+
79
+ functions = extract_functions(code)
80
+ classes = extract_classes(code)
81
+ grade, label = score_to_grade(result.overall_score)
82
+ color = score_color(result.overall_score)
83
+
84
+ score_html = f"""
85
+ <div style="text-align:center; padding:1.5rem;
86
+ background:#0f172a; border-radius:16px;
87
+ border:1px solid #1e293b; color:white;">
88
+ <div style="font-size:0.8rem; color:#94a3b8;
89
+ text-transform:uppercase; letter-spacing:0.1em;">
90
+ Overall Score
91
+ </div>
92
+ <div style="font-size:3rem; font-weight:700; color:{color}; margin:0.3rem 0;">
93
+ {result.overall_score}
94
+ </div>
95
+ <div style="font-size:1rem; color:#e2e8f0;">
96
+ Grade {grade} β€” {label}
97
+ </div>
98
+ <div style="display:flex; justify-content:center;
99
+ gap:2rem; margin-top:1rem; flex-wrap:wrap;">
100
+ <div>
101
+ <div style="color:#94a3b8; font-size:0.75rem;">Docs</div>
102
+ <div style="color:{score_color(result.documentation_score)};
103
+ font-weight:600; font-size:1.1rem;">
104
+ {result.documentation_score}
105
+ </div>
106
+ </div>
107
+ <div>
108
+ <div style="color:#94a3b8; font-size:0.75rem;">Naming</div>
109
+ <div style="color:{score_color(result.naming_score)};
110
+ font-weight:600; font-size:1.1rem;">
111
+ {result.naming_score}
112
+ </div>
113
+ </div>
114
+ <div>
115
+ <div style="color:#94a3b8; font-size:0.75rem;">Complexity</div>
116
+ <div style="color:{score_color(result.complexity_score)};
117
+ font-weight:600; font-size:1.1rem;">
118
+ {result.complexity_score}
119
+ </div>
120
+ </div>
121
+ </div>
122
+ </div>
123
+ """
124
+
125
+ issues_md = "\n".join(f"⚠️ {i}" for i in result.issues) \
126
+ if result.issues else "βœ… No critical issues found!"
127
+
128
+ suggestions_md = "\n".join(f"πŸ’‘ {s}" for s in result.suggestions)
129
+
130
+ func_lines = []
131
+ for fn in functions:
132
+ doc = "βœ“ docstring" if fn["has_docstring"] else "βœ— no docstring"
133
+ args = ", ".join(fn["args"]) if fn["args"] else "none"
134
+ func_lines.append(
135
+ f"**def {fn['name']}()** β€” args: `{args}` | "
136
+ f"returns: `{fn['returns'] or 'not annotated'}` | {doc}"
137
+ )
138
+ funcs_md = "\n\n".join(func_lines) if func_lines else "No functions found."
139
+
140
+ class_lines = []
141
+ for cls in classes:
142
+ doc = "βœ“ docstring" if cls["has_docstring"] else "βœ— no docstring"
143
+ methods = ", ".join(cls["methods"][:5])
144
+ class_lines.append(
145
+ f"**class {cls['name']}** β€” methods: `{methods}` | {doc}"
146
+ )
147
+ classes_md = "\n\n".join(class_lines) if class_lines else "No classes found."
148
+
149
+ docstring_md = f"```python\n{result.generated_docstring}\n```" \
150
+ if result.generated_docstring else "Docstring generation was disabled."
151
+
152
+ report = build_report(result)
153
+
154
+ radar = build_radar(
155
+ result.documentation_score,
156
+ result.naming_score,
157
+ result.complexity_score,
158
+ result.overall_score,
159
+ )
160
+ bars = build_bars(
161
+ result.documentation_score,
162
+ result.naming_score,
163
+ result.complexity_score,
164
+ )
165
+
166
+ return (
167
+ score_html, radar, bars,
168
+ issues_md, suggestions_md,
169
+ funcs_md, classes_md,
170
+ docstring_md, report,
171
+ )
172
+
173
+
174
+ def load_sample(sample_choice):
175
+ if sample_choice == "None":
176
+ return ""
177
+ return SAMPLES[sample_choice]
178
+
179
+
180
+ with gr.Blocks(title="Code Review NLP Assistant") as demo:
181
+
182
+ gr.Markdown("# πŸ”¬ Code Review NLP Assistant")
183
+ gr.Markdown("Powered by **CodeBERT** Β· **CodeT5** Β· **AST Analysis** β€” 100% free & open source")
184
+
185
+ with gr.Row():
186
+ with gr.Column(scale=1):
187
+ gr.Markdown("### πŸ“ Input")
188
+
189
+ sample_dropdown = gr.Dropdown(
190
+ choices=["None"] + list(SAMPLES.keys()),
191
+ value="None",
192
+ label="Load a sample",
193
+ )
194
+
195
+ code_input = gr.Code(
196
+ language="python",
197
+ label="Paste your Python code here",
198
+ lines=20,
199
+ )
200
+
201
+ sample_dropdown.change(
202
+ fn=load_sample,
203
+ inputs=sample_dropdown,
204
+ outputs=code_input,
205
+ )
206
+
207
+ with gr.Row():
208
+ generate_doc = gr.Checkbox(value=True, label="Generate docstring (CodeT5)")
209
+ get_embed = gr.Checkbox(value=False, label="Get embedding (CodeBERT)")
210
+
211
+ analyze_btn = gr.Button("πŸ” Analyze Code", variant="primary", size="lg")
212
+
213
+ with gr.Column(scale=2):
214
+ gr.Markdown("### πŸ“Š Results")
215
+
216
+ score_html = gr.HTML()
217
+
218
+ with gr.Row():
219
+ radar_chart = gr.Plot(label="Quality Radar")
220
+ bar_chart = gr.Plot(label="Score Breakdown")
221
+
222
+ with gr.Tabs():
223
+ with gr.Tab("⚠️ Issues"):
224
+ issues_out = gr.Markdown()
225
+ with gr.Tab("πŸ’‘ Suggestions"):
226
+ suggestions_out = gr.Markdown()
227
+ with gr.Tab("πŸ”§ Functions"):
228
+ funcs_out = gr.Markdown()
229
+ with gr.Tab("πŸ›οΈ Classes"):
230
+ classes_out = gr.Markdown()
231
+ with gr.Tab("πŸ€– Docstring"):
232
+ docstring_out = gr.Markdown()
233
+ with gr.Tab("πŸ“„ Full Report"):
234
+ report_out = gr.Markdown()
235
+
236
+ analyze_btn.click(
237
+ fn=analyze_code,
238
+ inputs=[code_input, sample_dropdown, generate_doc, get_embed],
239
+ outputs=[
240
+ score_html, radar_chart, bar_chart,
241
+ issues_out, suggestions_out,
242
+ funcs_out, classes_out,
243
+ docstring_out, report_out,
244
+ ],
245
+ )
246
+
247
+ if __name__ == "__main__":
248
+ demo.launch(share=True)
data/sample_code.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sample code files used to demo the Code Review NLP Assistant.
3
+ These range from poor quality to good quality Python code.
4
+ """
5
+
6
+ # ── Sample 1: Poor quality code ───────────────────────────────────────────
7
+ POOR_CODE = '''\
8
+ import os
9
+ import sys
10
+
11
+ x = 10
12
+ y = 20
13
+ z = 30
14
+
15
+ def f(a,b,c):
16
+ result = 0
17
+ for i in range(0,1000):
18
+ if a > 10:
19
+ if b > 20:
20
+ if c > 30:
21
+ result = a*b*c+x+y+z
22
+ if result > 9999:
23
+ result = 9999
24
+ return result
25
+
26
+ def g(lst):
27
+ try:
28
+ for item in lst:
29
+ if item > 0:
30
+ print(item * 2 + 100)
31
+ except:
32
+ pass
33
+
34
+ class D:
35
+ def __init__(self,n,v):
36
+ self.n = n
37
+ self.v = v
38
+
39
+ def upd(self,new_v):
40
+ self.v = new_v
41
+ return self.v
42
+ '''
43
+
44
+ # ── Sample 2: Medium quality code ────────────────────────────────────────
45
+ MEDIUM_CODE = '''\
46
+ import requests
47
+
48
+ BASE_URL = "https://api.example.com"
49
+
50
+ def get_user(user_id):
51
+ # Fetch user from API
52
+ response = requests.get(f"{BASE_URL}/users/{user_id}")
53
+ if response.status_code == 200:
54
+ return response.json()
55
+ return None
56
+
57
+ def create_user(name, email, age):
58
+ data = {
59
+ "name": name,
60
+ "email": email,
61
+ "age": age
62
+ }
63
+ response = requests.post(f"{BASE_URL}/users", json=data)
64
+ if response.status_code == 201:
65
+ return response.json()["id"]
66
+ else:
67
+ print(f"Error creating user: {response.status_code}")
68
+ return None
69
+
70
+ def update_user(user_id, **kwargs):
71
+ response = requests.put(f"{BASE_URL}/users/{user_id}", json=kwargs)
72
+ return response.status_code == 200
73
+
74
+ def delete_user(user_id):
75
+ response = requests.delete(f"{BASE_URL}/users/{user_id}")
76
+ return response.status_code == 204
77
+ '''
78
+
79
+ # ── Sample 3: Good quality code ───────────────────────────────────────────
80
+ GOOD_CODE = '''\
81
+ """
82
+ User authentication module for the application.
83
+
84
+ Provides secure password hashing, token generation,
85
+ and session validation utilities.
86
+ """
87
+
88
+ import hashlib
89
+ import secrets
90
+ from typing import Optional
91
+
92
+
93
+ MAX_TOKEN_LENGTH: int = 64
94
+ HASH_ITERATIONS: int = 100_000
95
+
96
+
97
+ def hash_password(password: str, salt: Optional[str] = None) -> tuple[str, str]:
98
+ """
99
+ Hash a plaintext password using PBKDF2-HMAC-SHA256.
100
+
101
+ Parameters
102
+ ----------
103
+ password : str
104
+ The plaintext password to hash.
105
+ salt : str, optional
106
+ A hex salt string. Generated fresh if not provided.
107
+
108
+ Returns
109
+ -------
110
+ tuple[str, str]
111
+ A (hashed_password, salt) pair, both as hex strings.
112
+ """
113
+ if salt is None:
114
+ salt = secrets.token_hex(16)
115
+ hashed = hashlib.pbkdf2_hmac(
116
+ "sha256",
117
+ password.encode("utf-8"),
118
+ salt.encode("utf-8"),
119
+ HASH_ITERATIONS,
120
+ )
121
+ return hashed.hex(), salt
122
+
123
+
124
+ def generate_session_token(length: int = MAX_TOKEN_LENGTH) -> str:
125
+ """Generate a cryptographically secure session token."""
126
+ return secrets.token_urlsafe(length)
127
+
128
+
129
+ def validate_password_strength(password: str) -> tuple[bool, list[str]]:
130
+ """
131
+ Check whether a password meets the security policy.
132
+
133
+ Returns a (is_valid, list_of_failures) tuple.
134
+ """
135
+ failures: list[str] = []
136
+ if len(password) < 8:
137
+ failures.append("Password must be at least 8 characters")
138
+ if not any(c.isupper() for c in password):
139
+ failures.append("Must contain at least one uppercase letter")
140
+ if not any(c.isdigit() for c in password):
141
+ failures.append("Must contain at least one digit")
142
+ if not any(c in "!@#$%^&*()" for c in password):
143
+ failures.append("Must contain at least one special character")
144
+ return len(failures) == 0, failures
145
+ '''
146
+
147
+ SAMPLES = {
148
+ "Poor quality (grade F/D)": POOR_CODE,
149
+ "Medium quality (grade C)": MEDIUM_CODE,
150
+ "Good quality (grade A)": GOOD_CODE,
151
+ }
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  transformers==4.40.0
2
  torch==2.2.2
3
  tokenizers==0.19.1
4
- streamlit==1.35.0
5
  tree-sitter==0.21.3
6
  tree-sitter-python==0.21.0
7
  radon==6.0.1
 
1
  transformers==4.40.0
2
  torch==2.2.2
3
  tokenizers==0.19.1
4
+ gradio==4.31.0
5
  tree-sitter==0.21.3
6
  tree-sitter-python==0.21.0
7
  radon==6.0.1