Spaces:

Nitishkumar-ai
/

commitguard-env

Running on A10G

App Files Files Community

Nitishkumar-ai commited on about 5 hours ago

Commit

8f4e44a

1 Parent(s): 6398066

Feat (Phase 1 & 2): Extract scanner module and add CLI interface

Browse files

Files changed (5) hide show

commitguard_env/cli.py +107 -0
commitguard_env/inference.py +118 -0
commitguard_env/models.py +9 -0
commitguard_env/scanner.py +54 -0
pyproject.toml +1 -0

commitguard_env/cli.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import argparse
+import json
+import subprocess
+import sys
+from dataclasses import asdict
+from pathlib import Path
+from .scanner import CommitGuardScanner
+def cmd_scan(args):
+    diff_text = ""
+    if getattr(args, "diff", None):
+        diff_text = Path(args.diff).read_text(encoding="utf-8")
+    elif getattr(args, "staged", False):
+        diff_text = subprocess.check_output(["git", "diff", "--staged"], text=True)
+    elif getattr(args, "commit", None):
+        diff_text = subprocess.check_output(["git", "show", args.commit], text=True)
+    elif getattr(args, "pr", None):
+        diff_text = subprocess.check_output(["gh", "pr", "diff", args.pr], text=True)
+    else:
+        print("Must specify one of --diff, --staged, --commit, or --pr")
+        sys.exit(1)
+    if not diff_text.strip():
+        print("No diff found to scan.")
+        sys.exit(0)
+    print(f"Loading model ({args.model})...", file=sys.stderr)
+    scanner = CommitGuardScanner(model_path=args.model, is_lora=args.is_lora, base_model=args.base_model)
+    print(f"Scanning diff ({len(diff_text)} chars)...", file=sys.stderr)
+    result = scanner.scan(diff_text)
+    if args.format == "json":
+        print(json.dumps(asdict(result), indent=2))
+    elif args.format == "text":
+        status = "VULNERABLE ⚠️" if result.is_vulnerable else "SAFE ✅"
+        print(f"\nVerdict: {status}")
+        if result.is_vulnerable:
+            print(f"CWE: {result.cwe}")
+            print(f"Exploit Sketch:\n  {result.exploit_sketch}")
+        if result.parse_error:
+            print(f"\nParser Warning: {result.parse_error}")
+    elif args.format == "sarif":
+        # Minimal SARIF output stub
+        print("SARIF format not fully implemented yet.", file=sys.stderr)
+        print(json.dumps(asdict(result)))
+    if args.fail_on_vulnerable and result.is_vulnerable:
+        sys.exit(1)
+def cmd_server(args):
+    from .server import main as server_main
+    server_main()
+def cmd_eval(args):
+    # This is a bit hacky to reuse the script without modifying sys.path everywhere
+    # A cleaner approach would be moving evaluate.py into commitguard_env
+    REPO_ROOT = Path(__file__).resolve().parent.parent
+    eval_script = REPO_ROOT / "scripts" / "evaluate.py"
+    cmd = [sys.executable, str(eval_script)]
+    cmd.extend(args.eval_args)
+    subprocess.run(cmd, check=True)
+def main():
+    parser = argparse.ArgumentParser(description="CommitGuard AI-paced security review")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    # 'scan' subcommand
+    scan_parser = subparsers.add_parser("scan", help="Scan a code diff for vulnerabilities")
+    source_group = scan_parser.add_mutually_exclusive_group(required=True)
+    source_group.add_argument("--diff", type=str, help="Path to a diff file")
+    source_group.add_argument("--staged", action="store_true", help="Scan git staged changes")
+    source_group.add_argument("--commit", type=str, help="Scan a specific git commit (e.g., HEAD)")
+    source_group.add_argument("--pr", type=str, help="Scan a GitHub PR URL or ID (requires gh cli)")
+    scan_parser.add_argument("--model", type=str, default="inmodel-labs/commitguard-llama-3b", help="Model path or HF ID")
+    scan_parser.add_argument("--base-model", type=str, default=None, help="Base model if using LoRA")
+    scan_parser.add_argument("--is-lora", action="store_true", help="Whether the model is a LoRA adapter")
+    scan_parser.add_argument("--format", choices=["text", "json", "sarif"], default="text", help="Output format")
+    scan_parser.add_argument("--fail-on-vulnerable", action="store_true", help="Exit with code 1 if vulnerable")
+    # 'server' subcommand
+    server_parser = subparsers.add_parser("server", help="Start the OpenEnv environment server")
+    # server_main takes PORT from environment
+    # 'eval' subcommand
+    eval_parser = subparsers.add_parser("eval", help="Run the evaluation harness")
+    eval_parser.add_argument("eval_args", nargs=argparse.REMAINDER, help="Arguments passed to evaluate.py")
+    args = parser.parse_args()
+    if args.command == "scan":
+        cmd_scan(args)
+    elif args.command == "server":
+        cmd_server(args)
+    elif args.command == "eval":
+        cmd_eval(args)
+if __name__ == "__main__":
+    main()

commitguard_env/inference.py ADDED Viewed

	@@ -0,0 +1,118 @@

+from __future__ import annotations
+import sys
+from pathlib import Path
+from typing import Any
+# Add project root to path for imports to find agent_prompt if run directly
+REPO_ROOT = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(REPO_ROOT))
+try:
+    from agent_prompt import SYSTEM_PROMPT
+except ImportError:
+    # Fallback if not found
+    SYSTEM_PROMPT = """You are a senior security researcher and pentester. Your task is to analyze code commits (diffs) to determine if they introduce exploitable vulnerabilities.
+You operate in a multi-step environment (up to 5 steps). You can request more context, analyze your thoughts, or issue a final verdict.
+### Action Format
+You MUST respond with exactly ONE action per turn, wrapped in XML tags:
+1. **Request Context:** Use this if you need to see the full content of a file listed in 'available_files'.
+<action>
+<action_type>request_context</action_type>
+<file_path>filename.c</file_path>
+</action>
+2. **Analyze:** Use this for your internal Chain-of-Thought reasoning. Be detailed.
+<action>
+<action_type>analyze</action_type>
+<reasoning>Your detailed step-by-step security analysis here...</reasoning>
+</action>
+3. **Verdict:** Use this to terminate the episode with your final judgment.
+<action>
+<action_type>verdict</action_type>
+<is_vulnerable>true/false</is_vulnerable>
+<vuln_type>CWE-XX (e.g., CWE-89)</vuln_type>
+<exploit_sketch>Brief description of how this could be exploited...</exploit_sketch>
+</action>
+### Rules & Constraints
+- If the code is safe, set is_vulnerable to false and vuln_type to NONE.
+- Be specific in exploit_sketch: name the attack vector (e.g., buffer overflow via unchecked memcpy).
+- Common CWE types: CWE-89 (SQLi), CWE-79 (XSS), CWE-78 (Command Inj), CWE-22 (Path Traversal), CWE-119 (Buffer Overflow), CWE-476 (Null Dereference), CWE-190 (Integer Overflow).
+- You have a maximum of 5 steps per episode.
+- Context requests have a small cost; be efficient.
+- Verifiable rewards (RLVR) are based on the accuracy of your final verdict and the presence of correct exploit keywords.
+"""
+def format_prompt(diff: str, available_files: list[str] = None) -> str:
+    """Format the diff into the expected model prompt."""
+    files_str = ", ".join(available_files) if available_files else "None"
+    user_prompt = f"""### Input Diff
+{diff}
+### Environment Info
+- Available Files: {files_str}
+- Current Step: 0/5
+Please provide your next action in XML format:"""
+    return (
+        f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
+        f"{SYSTEM_PROMPT}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"
+        f"{user_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
+    )
+def load_model(model_path: str, is_lora: bool = False, base_model: str = None) -> tuple[Any, Any]:
+    """
+    Load the LLM and tokenizer for inference.
+    """
+    import torch
+    if is_lora:
+        if not base_model:
+            raise ValueError("base_model is required if is_lora=True")
+        from unsloth import FastLanguageModel
+        from peft import PeftModel
+        model, tokenizer = FastLanguageModel.from_pretrained(
+            model_name=base_model,
+            max_seq_length=2048,
+            load_in_4bit=True,
+        )
+        model = PeftModel.from_pretrained(model, model_path)
+        FastLanguageModel.for_inference(model)
+    else:
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        device_map = "auto" if torch.cuda.is_available() else None
+        model = AutoModelForCausalLM.from_pretrained(
+            model_path,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            device_map=device_map
+        )
+        tokenizer = AutoTokenizer.from_pretrained(model_path)
+    return model, tokenizer
+def generate(model: Any, tokenizer: Any, prompt: str, max_new_tokens: int = 256) -> str:
+    import torch
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    with torch.no_grad():
+        output = model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            temperature=0.1,
+            do_sample=False,
+        )
+    response = tokenizer.decode(output[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
+    return response

commitguard_env/models.py CHANGED Viewed

@@ -59,3 +59,12 @@ class DevignSample:
     target_file: Optional[str] = None
     files: Optional[dict[str, str]] = None

     target_file: Optional[str] = None
     files: Optional[dict[str, str]] = None
+@dataclass(frozen=True, slots=True)
+class ScanResult:
+    is_vulnerable: bool
+    cwe: Optional[str]
+    exploit_sketch: Optional[str]
+    raw_response: str
+    parse_error: Optional[str] = None

commitguard_env/scanner.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from __future__ import annotations
+from typing import Any
+from .inference import format_prompt, generate, load_model
+from .models import ScanResult
+from .parse_action import parse_action
+class CommitGuardScanner:
+    """
+    Scanner for CommitGuard vulnerabilities.
+    Keeps the model in memory to allow fast scanning of multiple diffs.
+    """
+    def __init__(self, model_path: str = "inmodel-labs/commitguard-llama-3b", is_lora: bool = False, base_model: str = None) -> None:
+        self.model_path = model_path
+        self.is_lora = is_lora
+        self.base_model = base_model
+        self.model: Any = None
+        self.tokenizer: Any = None
+    def load(self) -> None:
+        """Load the model and tokenizer into memory."""
+        if self.model is None or self.tokenizer is None:
+            self.model, self.tokenizer = load_model(self.model_path, self.is_lora, self.base_model)
+    def scan(self, diff: str, available_files: list[str] = None) -> ScanResult:
+        """
+        Scan a given diff for vulnerabilities.
+        """
+        self.load()
+        prompt = format_prompt(diff, available_files)
+        response = generate(self.model, self.tokenizer, prompt)
+        action = parse_action(response)
+        # Map to ScanResult
+        return ScanResult(
+            is_vulnerable=action.is_vulnerable if action.is_vulnerable is not None else False,
+            cwe=action.vuln_type,
+            exploit_sketch=action.exploit_sketch,
+            raw_response=response,
+            parse_error=action.parse_error
+        )
+def scan(diff: str, model_path: str = "inmodel-labs/commitguard-llama-3b", is_lora: bool = False, base_model: str = None) -> ScanResult:
+    """
+    Convenience method to scan a single diff. Loads the model, scans, and returns the result.
+    If scanning multiple diffs, prefer instantiating CommitGuardScanner directly to avoid reloading the model.
+    """
+    scanner = CommitGuardScanner(model_path=model_path, is_lora=is_lora, base_model=base_model)
+    return scanner.scan(diff)

pyproject.toml CHANGED Viewed

@@ -33,6 +33,7 @@ train = [
 ]
 [project.scripts]
 server = "commitguard_env.server:main"
 [tool.setuptools]

 ]
 [project.scripts]
+commitguard = "commitguard_env.cli:main"
 server = "commitguard_env.server:main"
 [tool.setuptools]