ayshajavd commited on
Commit
7336b37
·
verified ·
1 Parent(s): fa93666

v2: Deploy updated app with per-class thresholds, temperature calibration, CWE-aware fix generation

Browse files
Files changed (1) hide show
  1. app.py +83 -155
app.py CHANGED
@@ -1,9 +1,14 @@
1
  """
2
- Code Security Risk Analyzer - Gradio UI + REST API
3
- Analyzes code for OWASP Top 10, CWE vulnerabilities.
4
- Outputs structured security report with vulnerability details, severity, and fixes.
5
-
6
- REST API: Use gradio_client or POST to /api/predict
 
 
 
 
 
7
  """
8
  import json
9
  import re
@@ -15,6 +20,8 @@ from transformers import (
15
  AutoModelForSequenceClassification,
16
  T5ForConditionalGeneration,
17
  )
 
 
18
 
19
  # ============================================================
20
  # Label Mappings
@@ -151,6 +158,9 @@ EXPLANATIONS = {
151
  CLASSIFIER_ID = "ayshajavd/graphcodebert-vuln-classifier"
152
  FIXER_ID = "ayshajavd/codet5p-vuln-fixer"
153
 
 
 
 
154
  print("Loading classifier...")
155
  try:
156
  cls_tokenizer = AutoTokenizer.from_pretrained(CLASSIFIER_ID)
@@ -158,13 +168,23 @@ try:
158
  cls_model.eval()
159
  CLASSIFIER_LOADED = True
160
  print("Classifier loaded successfully")
 
 
 
 
 
 
 
 
 
 
 
 
161
  except Exception as e:
162
  print(f"Classifier not available: {e}")
163
  cls_tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
164
  cls_model = AutoModelForSequenceClassification.from_pretrained(
165
- "huggingface/CodeBERTa-small-v1",
166
- num_labels=31,
167
- problem_type="multi_label_classification",
168
  )
169
  cls_model.eval()
170
  CLASSIFIER_LOADED = False
@@ -200,14 +220,27 @@ def detect_language(code: str) -> str:
200
  def classify_code(code):
201
  inputs = cls_tokenizer(code, return_tensors="pt", max_length=512, truncation=True, padding=True)
202
  with torch.no_grad():
203
- probs = torch.sigmoid(cls_model(**inputs).logits).squeeze().numpy()
204
- detected = [(cwe, float(p)) for i, (cwe, p) in enumerate(zip(TARGET_CWES, probs)) if cwe != "safe" and p > 0.3]
 
 
 
 
 
 
 
 
205
  detected.sort(key=lambda x: x[1], reverse=True)
206
  return detected, float(probs[0]), {cwe: float(p) for cwe, p in zip(TARGET_CWES, probs)}
207
 
208
 
209
- def generate_fix(code, language):
210
- input_ids = fix_tokenizer(f"fix {language.lower()}: " + code, return_tensors="pt", max_length=512, truncation=True).input_ids
 
 
 
 
 
211
  with torch.no_grad():
212
  out = fix_model.generate(input_ids, max_length=512, num_beams=5, early_stopping=True, no_repeat_ngram_size=3)
213
  return fix_tokenizer.decode(out[0], skip_special_tokens=True)
@@ -216,7 +249,6 @@ def generate_fix(code, language):
216
  def build_json_report(code):
217
  language = detect_language(code)
218
  detected, safe_prob, all_probs = classify_code(code)
219
-
220
  if not detected:
221
  overall_risk = max(0, int(100 - 100 * safe_prob))
222
  risk_level = "Low"
@@ -225,20 +257,19 @@ def build_json_report(code):
225
  avg_conf = sum(p for _, p in detected) / len(detected)
226
  overall_risk = min(100, int(max_sev * avg_conf * 1.2))
227
  risk_level = "Critical" if overall_risk >= 80 else "High" if overall_risk >= 60 else "Medium" if overall_risk >= 40 else "Low"
228
-
229
  vulns = []
230
  for cwe, conf in detected:
231
  sev, score = SEVERITY_MAP.get(cwe, ("Medium", 50))
 
232
  vulns.append({
233
  "cwe_id": cwe, "name": CWE_NAMES.get(cwe, cwe),
234
  "owasp_category": CWE_TO_OWASP.get(cwe, "N/A"),
235
  "severity": sev, "severity_score": score,
236
  "detection_confidence": round(conf, 4),
 
237
  "exploit_likelihood": min(100, int(conf * score)),
238
  "explanation": EXPLANATIONS.get(cwe, "Security risk detected.").replace("**", ""),
239
  })
240
-
241
- # Attack chain
242
  chain = None
243
  if len(detected) > 1:
244
  steps = []
@@ -252,16 +283,20 @@ def build_json_report(code):
252
  if cats & {"CWE-119","CWE-416","CWE-787","CWE-502"}:
253
  steps.append({"step": len(steps)+1, "phase": "Code Execution", "description": "Exploit memory corruption"})
254
  if steps: chain = steps
255
-
256
  fix = None
257
  try:
258
- f = generate_fix(code, language)
 
259
  if f and f.strip(): fix = f
260
  except: pass
261
-
262
  return {
263
  "language": language,
264
- "model_status": {"classifier": "trained" if CLASSIFIER_LOADED else "base_model", "fix_generator": "trained" if FIXER_LOADED else "base_model"},
 
 
 
 
 
265
  "overall_risk_score": overall_risk, "risk_level": risk_level,
266
  "safe_probability": round(safe_prob, 4), "num_vulnerabilities": len(vulns),
267
  "vulnerabilities": vulns, "attack_chain": chain, "suggested_fix": fix,
@@ -271,24 +306,25 @@ def build_json_report(code):
271
 
272
 
273
  def analyze_code(code):
274
- if not code or not code.strip(): return "⚠️ Please paste some code to analyze."
275
  data = build_json_report(code)
276
-
277
- r = ["# 🔒 Code Security Analysis Report\n"]
278
  r.append(f"**Language:** {data['language']}")
279
- r.append(f"**Classifier:** {'✅ Trained' if data['model_status']['classifier']=='trained' else '⚠️ Base Model (demo)'}")
280
- r.append(f"**Fix Generator:** {'✅ Trained' if data['model_status']['fix_generator']=='trained' else '⚠️ Base Model'}\n")
281
-
 
 
 
 
282
  if data['num_vulnerabilities'] == 0:
283
- r.append("## No Vulnerabilities Detected")
284
  r.append(f"**Risk Score:** {data['overall_risk_score']}/100 | **Safe Confidence:** {data['safe_probability']:.1%}\n")
285
  r.append("Code appears safe. Always supplement with manual review and SAST tools.")
286
  return "\n".join(r)
287
-
288
  emoji = {"Critical":"🔴","High":"🟠","Medium":"🟡","Low":"🟢"}.get(data['risk_level'],"⚪")
289
  r.append(f"## {emoji} {data['num_vulnerabilities']} Vulnerability(ies) Detected\n")
290
  r.append(f"**Risk Score:** {data['overall_risk_score']}/100 ({data['risk_level']}) | **Safe Probability:** {data['safe_probability']:.1%}\n---\n")
291
-
292
  for i, v in enumerate(data['vulnerabilities'], 1):
293
  se = {"Critical":"🔴","High":"🟠","Medium":"🟡","Low":"🟢"}.get(v['severity'],"⚪")
294
  r.append(f"### {i}. {se} {v['name']}")
@@ -296,22 +332,20 @@ def analyze_code(code):
296
  r.append(f"| **CWE ID** | {v['cwe_id']} |")
297
  r.append(f"| **OWASP** | {v['owasp_category']} |")
298
  r.append(f"| **Severity** | {v['severity']} ({v['severity_score']}/100) |")
299
- r.append(f"| **Confidence** | {v['detection_confidence']:.1%} |")
 
300
  r.append(f"| **Exploit Likelihood** | {v['exploit_likelihood']}% |")
301
  r.append(f"\n**Why Dangerous:** {v['explanation']}\n")
302
-
303
  if data['attack_chain']:
304
- r.append("---\n## ⛓️ Attack Chain\n")
305
  for s in data['attack_chain']:
306
  r.append(f"{s['step']}. **{s['phase']}** — {s['description']}")
307
-
308
- r.append("\n---\n## 🔧 Suggested Fix\n")
309
  if data['suggested_fix']:
310
  r.append(f"```{data['language'].lower()}\n{data['suggested_fix']}\n```")
311
  else:
312
  r.append("*Fix generation unavailable. Please review manually.*")
313
-
314
- r.append("\n---\n*AI-generated report. Verify with manual review and SAST tools.*")
315
  return "\n".join(r)
316
 
317
 
@@ -320,97 +354,28 @@ def get_json_report(code):
320
  return build_json_report(code)
321
 
322
 
323
- # ============================================================
324
- # Example Snippets
325
- # ============================================================
326
  EXAMPLES = [
327
- ["""import sqlite3
328
-
329
- def get_user(username):
330
- conn = sqlite3.connect('users.db')
331
- query = f"SELECT * FROM users WHERE username = '{username}'"
332
- return conn.execute(query).fetchone()
333
-
334
- def login(request):
335
- user = get_user(request.form['username'])
336
- if user and user[2] == request.form['password']:
337
- return "Login successful"
338
- return "Login failed"
339
- """],
340
- ["""#include <stdio.h>
341
- #include <string.h>
342
-
343
- void process_input(char *user_input) {
344
- char buffer[64];
345
- strcpy(buffer, user_input);
346
- printf("Processed: %s\\n", buffer);
347
- }
348
-
349
- int main(int argc, char *argv[]) {
350
- if (argc > 1) process_input(argv[1]);
351
- return 0;
352
- }
353
- """],
354
- ["""const express = require('express');
355
- const app = express();
356
-
357
- app.get('/search', (req, res) => {
358
- const query = req.query.q;
359
- res.send(`<h1>Results for: ${query}</h1>`);
360
- });
361
-
362
- app.get('/profile/:id', (req, res) => {
363
- db.query('SELECT * FROM users WHERE id = ' + req.params.id, (err, user) => {
364
- res.send(`<h2>${user.name}</h2>`);
365
- });
366
- });
367
- """],
368
- ["""import requests, hashlib
369
-
370
- API_KEY = "sk-proj-abc123def456"
371
- DB_PASSWORD = "admin123"
372
-
373
- def connect_to_api():
374
- return requests.get("https://api.example.com/data",
375
- headers={"Authorization": f"Bearer {API_KEY}"}).json()
376
-
377
- def hash_password(password):
378
- return hashlib.md5(password.encode()).hexdigest()
379
- """],
380
- ["""import sqlite3
381
- from hashlib import sha256
382
- import hmac, secrets
383
-
384
- def get_user(username):
385
- conn = sqlite3.connect('users.db')
386
- conn.execute("SELECT * FROM users WHERE username = ?", (username,))
387
- return conn.fetchone()
388
-
389
- def hash_password(password, salt=None):
390
- salt = salt or secrets.token_hex(16)
391
- return f"{salt}:{sha256((salt+password).encode()).hexdigest()}"
392
-
393
- def verify_password(password, stored):
394
- salt, expected = stored.split(':')
395
- return hmac.compare_digest(sha256((salt+password).encode()).hexdigest(), expected)
396
- """],
397
  ]
398
 
399
- # ============================================================
400
- # Gradio UI
401
- # ============================================================
402
  with gr.Blocks(
403
- title="Code Security Risk Analyzer",
404
  theme=gr.themes.Soft(),
405
  css=".gradio-container { max-width: 1200px; margin: auto; }",
406
  ) as demo:
407
  gr.Markdown("""
408
- # 🔒 AI-Powered Code Security Risk Analyzer
409
- ### Detect OWASP Top 10 & CWE vulnerabilities with secure fix suggestions
410
 
411
  Paste code in Python, JavaScript, Java, C, C++, PHP, or Go.
412
 
413
- **Models:** [GraphCodeBERT](https://huggingface.co/ayshajavd/graphcodebert-vuln-classifier) (detection) + [CodeT5+](https://huggingface.co/ayshajavd/codet5p-vuln-fixer) (fixes) | **Dataset:** [175K samples](https://huggingface.co/datasets/ayshajavd/code-security-vulnerability-dataset)
 
 
414
  """)
415
 
416
  with gr.Row():
@@ -431,7 +396,6 @@ with gr.Blocks(
431
  analyze_btn.click(fn=analyze_code, inputs=[code_input], outputs=[report_output], api_name="analyze")
432
  json_btn.click(fn=show_json, inputs=[code_input], outputs=[json_output])
433
 
434
- # Hidden API-only endpoint for raw JSON reports
435
  with gr.Row(visible=False):
436
  api_json_btn = gr.Button("get_json", visible=False)
437
  api_json_btn.click(fn=get_json_report, inputs=[code_input], outputs=[json_output], api_name="get_json_report")
@@ -441,51 +405,15 @@ with gr.Blocks(
441
  ### Python Client
442
  ```python
443
  from gradio_client import Client
444
-
445
  client = Client("ayshajavd/code-security-analyzer")
446
-
447
- # Get markdown report
448
  report = client.predict(code="your code here", api_name="/analyze")
449
-
450
- # Get structured JSON report
451
  json_report = client.predict(code="your code here", api_name="/get_json_report")
452
  ```
453
 
454
  ### cURL
455
  ```bash
456
- # Markdown report
457
- curl -X POST https://ayshajavd-code-security-analyzer.hf.space/call/analyze \
458
- -H "Content-Type: application/json" \
459
- -d '{"data": ["your code here"]}'
460
-
461
- # JSON report
462
- curl -X POST https://ayshajavd-code-security-analyzer.hf.space/call/get_json_report \
463
- -H "Content-Type: application/json" \
464
- -d '{"data": ["your code here"]}'
465
- ```
466
-
467
- ### JSON Response Schema
468
- ```json
469
- {
470
- "language": "Python",
471
- "overall_risk_score": 85,
472
- "risk_level": "Critical",
473
- "safe_probability": 0.12,
474
- "num_vulnerabilities": 2,
475
- "vulnerabilities": [{
476
- "cwe_id": "CWE-89",
477
- "name": "SQL Injection",
478
- "owasp_category": "A03:2021 - Injection",
479
- "severity": "Critical",
480
- "severity_score": 95,
481
- "detection_confidence": 0.92,
482
- "exploit_likelihood": 87,
483
- "explanation": "..."
484
- }],
485
- "attack_chain": [{"step": 1, "phase": "Initial Access", "description": "..."}],
486
- "suggested_fix": "...",
487
- "timestamp": "2025-01-01T00:00:00Z"
488
- }
489
  ```
490
  """)
491
 
 
1
  """
2
+ Code Security Risk Analyzer v2 - Gradio UI + REST API
3
+ =====================================================
4
+ IMPROVEMENTS OVER v1:
5
+ - Per-class threshold optimization (not global 0.3)
6
+ - Temperature scaling calibration (meaningful probabilities)
7
+ - Uses label_config.json for thresholds + calibration
8
+ - Better vulnerability detection across rare CWEs
9
+
10
+ Run AFTER notebooks 1-4 to use the improved models.
11
+ Upload this to: https://huggingface.co/spaces/ayshajavd/code-security-analyzer
12
  """
13
  import json
14
  import re
 
20
  AutoModelForSequenceClassification,
21
  T5ForConditionalGeneration,
22
  )
23
+ from huggingface_hub import hf_hub_download
24
+ import numpy as np
25
 
26
  # ============================================================
27
  # Label Mappings
 
158
  CLASSIFIER_ID = "ayshajavd/graphcodebert-vuln-classifier"
159
  FIXER_ID = "ayshajavd/codet5p-vuln-fixer"
160
 
161
+ THRESHOLDS = {cwe: 0.3 for cwe in TARGET_CWES}
162
+ TEMPERATURE = 1.0
163
+
164
  print("Loading classifier...")
165
  try:
166
  cls_tokenizer = AutoTokenizer.from_pretrained(CLASSIFIER_ID)
 
168
  cls_model.eval()
169
  CLASSIFIER_LOADED = True
170
  print("Classifier loaded successfully")
171
+ try:
172
+ config_path = hf_hub_download(CLASSIFIER_ID, "label_config.json")
173
+ with open(config_path) as f:
174
+ label_config = json.load(f)
175
+ if "optimized_thresholds" in label_config:
176
+ THRESHOLDS = label_config["optimized_thresholds"]
177
+ print(f"Per-class thresholds loaded ({len(THRESHOLDS)} classes)")
178
+ if "temperature" in label_config:
179
+ TEMPERATURE = label_config["temperature"]
180
+ print(f"Temperature calibration loaded (T={TEMPERATURE:.4f})")
181
+ except Exception as e:
182
+ print(f"Could not load label_config: {e}. Using defaults.")
183
  except Exception as e:
184
  print(f"Classifier not available: {e}")
185
  cls_tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
186
  cls_model = AutoModelForSequenceClassification.from_pretrained(
187
+ "huggingface/CodeBERTa-small-v1", num_labels=31, problem_type="multi_label_classification",
 
 
188
  )
189
  cls_model.eval()
190
  CLASSIFIER_LOADED = False
 
220
  def classify_code(code):
221
  inputs = cls_tokenizer(code, return_tensors="pt", max_length=512, truncation=True, padding=True)
222
  with torch.no_grad():
223
+ logits = cls_model(**inputs).logits.squeeze()
224
+ calibrated_logits = logits / TEMPERATURE
225
+ probs = torch.sigmoid(calibrated_logits).numpy()
226
+ detected = []
227
+ for i, (cwe, p) in enumerate(zip(TARGET_CWES, probs)):
228
+ if cwe == "safe":
229
+ continue
230
+ threshold = THRESHOLDS.get(cwe, 0.3)
231
+ if p > threshold:
232
+ detected.append((cwe, float(p)))
233
  detected.sort(key=lambda x: x[1], reverse=True)
234
  return detected, float(probs[0]), {cwe: float(p) for cwe, p in zip(TARGET_CWES, probs)}
235
 
236
 
237
+ def generate_fix(code, language, cwe_id=None):
238
+ if cwe_id:
239
+ cwe_name = CWE_NAMES.get(cwe_id, cwe_id)
240
+ prefix = f"fix {cwe_name} vulnerability in {language.lower()}: "
241
+ else:
242
+ prefix = f"fix {language.lower()}: "
243
+ input_ids = fix_tokenizer(prefix + code, return_tensors="pt", max_length=512, truncation=True).input_ids
244
  with torch.no_grad():
245
  out = fix_model.generate(input_ids, max_length=512, num_beams=5, early_stopping=True, no_repeat_ngram_size=3)
246
  return fix_tokenizer.decode(out[0], skip_special_tokens=True)
 
249
  def build_json_report(code):
250
  language = detect_language(code)
251
  detected, safe_prob, all_probs = classify_code(code)
 
252
  if not detected:
253
  overall_risk = max(0, int(100 - 100 * safe_prob))
254
  risk_level = "Low"
 
257
  avg_conf = sum(p for _, p in detected) / len(detected)
258
  overall_risk = min(100, int(max_sev * avg_conf * 1.2))
259
  risk_level = "Critical" if overall_risk >= 80 else "High" if overall_risk >= 60 else "Medium" if overall_risk >= 40 else "Low"
 
260
  vulns = []
261
  for cwe, conf in detected:
262
  sev, score = SEVERITY_MAP.get(cwe, ("Medium", 50))
263
+ threshold_used = THRESHOLDS.get(cwe, 0.3)
264
  vulns.append({
265
  "cwe_id": cwe, "name": CWE_NAMES.get(cwe, cwe),
266
  "owasp_category": CWE_TO_OWASP.get(cwe, "N/A"),
267
  "severity": sev, "severity_score": score,
268
  "detection_confidence": round(conf, 4),
269
+ "threshold_used": round(threshold_used, 3),
270
  "exploit_likelihood": min(100, int(conf * score)),
271
  "explanation": EXPLANATIONS.get(cwe, "Security risk detected.").replace("**", ""),
272
  })
 
 
273
  chain = None
274
  if len(detected) > 1:
275
  steps = []
 
283
  if cats & {"CWE-119","CWE-416","CWE-787","CWE-502"}:
284
  steps.append({"step": len(steps)+1, "phase": "Code Execution", "description": "Exploit memory corruption"})
285
  if steps: chain = steps
 
286
  fix = None
287
  try:
288
+ top_cwe = detected[0][0] if detected else None
289
+ f = generate_fix(code, language, top_cwe)
290
  if f and f.strip(): fix = f
291
  except: pass
 
292
  return {
293
  "language": language,
294
+ "model_status": {
295
+ "classifier": "trained_v2" if CLASSIFIER_LOADED else "base_model",
296
+ "fix_generator": "trained_v2" if FIXER_LOADED else "base_model",
297
+ "calibration": f"T={TEMPERATURE:.4f}" if TEMPERATURE != 1.0 else "none",
298
+ "thresholds": "per_class_optimized" if any(v != 0.3 for v in THRESHOLDS.values()) else "global_0.3",
299
+ },
300
  "overall_risk_score": overall_risk, "risk_level": risk_level,
301
  "safe_probability": round(safe_prob, 4), "num_vulnerabilities": len(vulns),
302
  "vulnerabilities": vulns, "attack_chain": chain, "suggested_fix": fix,
 
306
 
307
 
308
  def analyze_code(code):
309
+ if not code or not code.strip(): return "Please paste some code to analyze."
310
  data = build_json_report(code)
311
+ r = ["# Code Security Analysis Report\n"]
 
312
  r.append(f"**Language:** {data['language']}")
313
+ cls_status = "Trained v2 (GraphCodeBERT + ASL)" if data['model_status']['classifier'] == 'trained_v2' else "Base Model"
314
+ fix_status = "Trained v2 (CodeT5+ CWE-aware)" if data['model_status']['fix_generator'] == 'trained_v2' else "Base Model"
315
+ r.append(f"**Classifier:** {cls_status}")
316
+ r.append(f"**Fix Generator:** {fix_status}")
317
+ if data['model_status']['calibration'] != 'none':
318
+ r.append(f"**Calibration:** {data['model_status']['calibration']} | **Thresholds:** {data['model_status']['thresholds']}")
319
+ r.append("")
320
  if data['num_vulnerabilities'] == 0:
321
+ r.append("## No Vulnerabilities Detected")
322
  r.append(f"**Risk Score:** {data['overall_risk_score']}/100 | **Safe Confidence:** {data['safe_probability']:.1%}\n")
323
  r.append("Code appears safe. Always supplement with manual review and SAST tools.")
324
  return "\n".join(r)
 
325
  emoji = {"Critical":"🔴","High":"🟠","Medium":"🟡","Low":"🟢"}.get(data['risk_level'],"⚪")
326
  r.append(f"## {emoji} {data['num_vulnerabilities']} Vulnerability(ies) Detected\n")
327
  r.append(f"**Risk Score:** {data['overall_risk_score']}/100 ({data['risk_level']}) | **Safe Probability:** {data['safe_probability']:.1%}\n---\n")
 
328
  for i, v in enumerate(data['vulnerabilities'], 1):
329
  se = {"Critical":"🔴","High":"🟠","Medium":"🟡","Low":"🟢"}.get(v['severity'],"⚪")
330
  r.append(f"### {i}. {se} {v['name']}")
 
332
  r.append(f"| **CWE ID** | {v['cwe_id']} |")
333
  r.append(f"| **OWASP** | {v['owasp_category']} |")
334
  r.append(f"| **Severity** | {v['severity']} ({v['severity_score']}/100) |")
335
+ r.append(f"| **Confidence** | {v['detection_confidence']:.1%} (calibrated) |")
336
+ r.append(f"| **Threshold** | {v['threshold_used']:.3f} (per-class optimized) |")
337
  r.append(f"| **Exploit Likelihood** | {v['exploit_likelihood']}% |")
338
  r.append(f"\n**Why Dangerous:** {v['explanation']}\n")
 
339
  if data['attack_chain']:
340
+ r.append("---\n## Attack Chain\n")
341
  for s in data['attack_chain']:
342
  r.append(f"{s['step']}. **{s['phase']}** — {s['description']}")
343
+ r.append("\n---\n## Suggested Fix\n")
 
344
  if data['suggested_fix']:
345
  r.append(f"```{data['language'].lower()}\n{data['suggested_fix']}\n```")
346
  else:
347
  r.append("*Fix generation unavailable. Please review manually.*")
348
+ r.append("\n---\n*AI-generated report (v2: calibrated probabilities + per-class thresholds). Verify with manual review and SAST tools.*")
 
349
  return "\n".join(r)
350
 
351
 
 
354
  return build_json_report(code)
355
 
356
 
 
 
 
357
  EXAMPLES = [
358
+ ["""import sqlite3\n\ndef get_user(username):\n conn = sqlite3.connect('users.db')\n query = f"SELECT * FROM users WHERE username = '{username}'"\n return conn.execute(query).fetchone()\n"""],
359
+ ["""#include <stdio.h>\n#include <string.h>\n\nvoid process_input(char *user_input) {\n char buffer[64];\n strcpy(buffer, user_input);\n printf("Processed: %s\\n", buffer);\n}\n"""],
360
+ ["""const express = require('express');\nconst app = express();\n\napp.get('/search', (req, res) => {\n const query = req.query.q;\n res.send(`<h1>Results for: ${query}</h1>`);\n});\n"""],
361
+ ["""import requests, hashlib\n\nAPI_KEY = "sk-proj-abc123def456"\nDB_PASSWORD = "admin123"\n\ndef hash_password(password):\n return hashlib.md5(password.encode()).hexdigest()\n"""],
362
+ ["""import sqlite3\nfrom hashlib import sha256\nimport hmac, secrets\n\ndef get_user(username):\n conn = sqlite3.connect('users.db')\n conn.execute("SELECT * FROM users WHERE username = ?", (username,))\n return conn.fetchone()\n"""],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  ]
364
 
 
 
 
365
  with gr.Blocks(
366
+ title="Code Security Risk Analyzer v2",
367
  theme=gr.themes.Soft(),
368
  css=".gradio-container { max-width: 1200px; margin: auto; }",
369
  ) as demo:
370
  gr.Markdown("""
371
+ # 🔒 AI-Powered Code Security Risk Analyzer v2
372
+ ### Detect OWASP Top 10 & CWE vulnerabilities with calibrated confidence + per-class thresholds
373
 
374
  Paste code in Python, JavaScript, Java, C, C++, PHP, or Go.
375
 
376
+ **Models:** [GraphCodeBERT](https://huggingface.co/ayshajavd/graphcodebert-vuln-classifier) (detection, Macro F1=0.476) + [CodeT5+](https://huggingface.co/ayshajavd/codet5p-vuln-fixer) (fixes, BLEU=81.0) | **Dataset:** [175K samples](https://huggingface.co/datasets/ayshajavd/code-security-vulnerability-dataset)
377
+
378
+ **v2 Improvements:** Per-class threshold optimization | Temperature-calibrated probabilities | Asymmetric Loss training | GraphCodeBERT-base (125M params) | CodeT5+ 220M CWE-aware fixer
379
  """)
380
 
381
  with gr.Row():
 
396
  analyze_btn.click(fn=analyze_code, inputs=[code_input], outputs=[report_output], api_name="analyze")
397
  json_btn.click(fn=show_json, inputs=[code_input], outputs=[json_output])
398
 
 
399
  with gr.Row(visible=False):
400
  api_json_btn = gr.Button("get_json", visible=False)
401
  api_json_btn.click(fn=get_json_report, inputs=[code_input], outputs=[json_output], api_name="get_json_report")
 
405
  ### Python Client
406
  ```python
407
  from gradio_client import Client
 
408
  client = Client("ayshajavd/code-security-analyzer")
 
 
409
  report = client.predict(code="your code here", api_name="/analyze")
 
 
410
  json_report = client.predict(code="your code here", api_name="/get_json_report")
411
  ```
412
 
413
  ### cURL
414
  ```bash
415
+ curl -X POST https://ayshajavd-code-security-analyzer.hf.space/call/analyze \\
416
+ -H "Content-Type: application/json" -d '{"data": ["your code here"]}'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  ```
418
  """)
419