Spaces:

Samarthrr
/

revcode-ai-engine

Sleeping

App Files Files Community

Samarthrr commited on 25 days ago

Commit

96d9e55

verified ·

1 Parent(s): d6d6f14

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -29

app.py CHANGED Viewed

@@ -30,21 +30,49 @@ class SecurityClassifier(nn.Module):
 class Guardrails:
     @staticmethod
     def validate(code: str):
         try:
             tree = ast.parse(code)
             for node in ast.walk(tree):
                 if isinstance(node, ast.FunctionDef):
                     if not node.name.islower() and "_" not in node.name:
-                        return False, f"Function '{node.name}' violates snake_case standards."
-            return True, "Valid"
         except Exception as e:
             return False, f"Syntax analysis failed: {str(e)}"
-# ---------------------------------------------------------
-# 3. GLOBAL MODEL HANDLERS (Lazy Loading)
-# ---------------------------------------------------------
-FIXER_MODEL = "Salesforce/codet5p-220m"
-SECURITY_MODEL = "distilbert-base-uncased"
 models = {
     "fixer": None,
@@ -54,20 +82,26 @@ models = {
 def load_fixer():
     if not models["fixer"]:
-        print("Loading CodeT5+ Fixer...")
-        models["tokenizers"]["fixer"] = RobertaTokenizer.from_pretrained(FIXER_MODEL)
-        models["fixer"] = T5ForConditionalGeneration.from_pretrained(FIXER_MODEL)
-    return models["fixer"], models["tokenizers"]["fixer"]
 def load_security():
     if not models["security"]:
-        print("Loading DistilBERT Guardian...")
-        models["tokenizers"]["security"] = DistilBertTokenizer.from_pretrained(SECURITY_MODEL)
-        # In a real app, we'd load fine-tuned weights here.
-        # For the demo, we use the base model with the classifier head.
-        models["security"] = SecurityClassifier()
-        models["security"].eval()
-    return models["security"], models["tokenizers"]["security"]
 # ---------------------------------------------------------
 # 4. API ENDPOINTS
@@ -78,6 +112,17 @@ class CodeInput(BaseModel):
 @app.post("/analyze")
 async def analyze_security(data: CodeInput):
     model, tokenizer = load_security()
     inputs = tokenizer(data.code, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         logits = model(inputs['input_ids'], inputs['attention_mask'])
@@ -87,26 +132,34 @@ async def analyze_security(data: CodeInput):
     return {
         "is_vulnerable": vulnerability_prob > 0.5,
         "confidence": round(vulnerability_prob * 100, 2),
-        "verdict": "SECURE" if vulnerability_prob <= 0.5 else "VULNERABLE"
     }
 @app.post("/fix")
 async def fix_code(data: CodeInput):
     model, tokenizer = load_fixer()
-    input_text = f"Fix code: {data.code}"
-    inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
-    with torch.no_grad():
-        outputs = model.generate(**inputs, max_length=512)
-    suggestion = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Run Guardrails
-    is_valid, msg = Guardrails.validate(suggestion)
     return {
         "suggestion": suggestion,
-        "guardrail_status": "PASSED" if is_valid else "FAILED",
         "guardrail_msg": msg
     }

 class Guardrails:
     @staticmethod
     def validate(code: str):
+        findings = []
         try:
             tree = ast.parse(code)
             for node in ast.walk(tree):
+                # Check for naming conventions
                 if isinstance(node, ast.FunctionDef):
                     if not node.name.islower() and "_" not in node.name:
+                        findings.append(f"Function '{node.name}' should use snake_case.")
+                # Check for hardcoded secrets in assignments
+                if isinstance(node, ast.Assign):
+                    for target in node.targets:
+                        if isinstance(target, ast.Name):
+                            name = target.id.lower()
+                            if any(k in name for k in ['pk', 'secret', 'password', 'api_key', 'token']):
+                                if isinstance(node.value, ast.Constant) and isinstance(node.value.value, str):
+                                    findings.append(f"Potential hardcoded secret in variable '{target.id}'.")
+            if not findings:
+                return True, "Valid"
+            return False, " | ".join(findings)
         except Exception as e:
             return False, f"Syntax analysis failed: {str(e)}"
+# ... (rest of models and load functions remain same)
+@app.post("/verify")
+async def verify_fix(data: dict):
+    # Specialized verification endpoint for external engines
+    code = data.get("code", "")
+    is_valid, msg = Guardrails.validate(code)
+    return {
+        "is_valid": is_valid,
+        "message": msg,
+        "status": "PASSED" if is_valid else "WARNING"
+    }
+@app.post("/fix")
+async def fix_code(data: CodeInput):
+    model, tokenizer = load_fixer()
+    suggestion = data.code
+    # ... (existing fix code)
 models = {
     "fixer": None,
 def load_fixer():
     if not models["fixer"]:
+        try:
+            print("Loading CodeT5+ Fixer...")
+            models["tokenizers"]["fixer"] = RobertaTokenizer.from_pretrained(FIXER_MODEL)
+            models["fixer"] = T5ForConditionalGeneration.from_pretrained(FIXER_MODEL)
+        except Exception as e:
+            print(f"Failed to load fixer model: {e}. Falling back to Rule Engine.")
+            models["fixer"] = "RULE_ENGINE"
+    return models["fixer"], models["tokenizers"].get("fixer")
 def load_security():
     if not models["security"]:
+        try:
+            print("Loading DistilBERT Guardian...")
+            models["tokenizers"]["security"] = DistilBertTokenizer.from_pretrained(SECURITY_MODEL)
+            models["security"] = SecurityClassifier()
+            models["security"].eval()
+        except Exception as e:
+            print(f"Failed to load security model: {e}. Falling back to Heuristic Scan.")
+            models["security"] = "HEURISTIC"
+    return models["security"], models["tokenizers"].get("security")
 # ---------------------------------------------------------
 # 4. API ENDPOINTS
 @app.post("/analyze")
 async def analyze_security(data: CodeInput):
     model, tokenizer = load_security()
+    if model == "HEURISTIC":
+        # Rule-based fallback for security
+        is_vulnerable = "eval(" in data.code or "innerHTML" in data.code
+        return {
+            "is_vulnerable": is_vulnerable,
+            "confidence": 85.0 if is_vulnerable else 95.0,
+            "verdict": "VULNERABLE" if is_vulnerable else "SECURE",
+            "provider": "RuleEngine"
+        }
     inputs = tokenizer(data.code, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         logits = model(inputs['input_ids'], inputs['attention_mask'])
     return {
         "is_vulnerable": vulnerability_prob > 0.5,
         "confidence": round(vulnerability_prob * 100, 2),
+        "verdict": "SECURE" if vulnerability_prob <= 0.5 else "VULNERABLE",
+        "provider": "DistilBERT"
     }
 @app.post("/fix")
 async def fix_code(data: CodeInput):
     model, tokenizer = load_fixer()
+    suggestion = data.code
+    if model == "RULE_ENGINE":
+        # Advanced Rule-based correction
+        suggestion = data.code.replace("eval(", "JSON.parse(").replace("console.log(", "// logger.info(")
+        status = "PASSED"
+        msg = "Rule-based fix applied (Model offline)"
+    else:
+        input_text = f"Fix code: {data.code}"
+        inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
+        with torch.no_grad():
+            outputs = model.generate(**inputs, max_length=512)
+        suggestion = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Run Guardrails
+        is_valid, msg = Guardrails.validate(suggestion)
+        status = "PASSED" if is_valid else "FAILED"
     return {
         "suggestion": suggestion,
+        "guardrail_status": status,
         "guardrail_msg": msg
     }