code-review-environment / openenv.yaml
ashishbaberwal's picture
Final Changes
344c3c9
name: code-review-agent-env
version: 1.0.0
description: |
A realistic code review environment where AI agents review pull requests,
identify issues, suggest improvements, and make approval decisions.
Models real-world software development workflows.
authors:
- Ashish <ashishkbaberwal@gmail.com>
- Shardul <shardulmd@gmail.com>
- Harshit <shakyanitin807@gmail.com>
tags:
- code-review
- software-engineering
- agent-evaluation
- real-world-task
license: MIT
environment:
class: environment.env.CodeReviewEnv
entry_point: environment.env:CodeReviewEnv
tasks:
- id: bug_detection_easy_1
name: "Easy: Detect Division by Zero"
difficulty: easy
grader:
type: deterministic
endpoint: /score
- id: bug_detection_easy_2
name: "Easy: Off-by-One Error"
difficulty: easy
grader:
type: deterministic
endpoint: /score
- id: approve_easy_3
name: "Easy: Approve Safe Refactor"
difficulty: easy
grader:
type: deterministic
endpoint: /score
- id: memory_leak_medium_1
name: "Medium: Memory Leak Detection"
difficulty: medium
grader:
type: deterministic
endpoint: /score
- id: performance_medium_2
name: "Medium: String Concatenation Performance"
difficulty: medium
grader:
type: deterministic
endpoint: /score
- id: approve_medium_3
name: "Medium: Approve Safe Query Helper"
difficulty: medium
grader:
type: deterministic
endpoint: /score
- id: type_safety_medium_4
name: "Medium: Type Safety Optional Arithmetic"
difficulty: medium
grader:
type: deterministic
endpoint: /score
- id: javascript_medium_5
name: "Medium: JavaScript Undefined Access"
difficulty: medium
grader:
type: deterministic
endpoint: /score
- id: security_hard_1
name: "Hard: SQL Injection Vulnerability"
difficulty: hard
grader:
type: deterministic
endpoint: /score
- id: race_condition_hard_2
name: "Hard: Race Condition"
difficulty: hard
grader:
type: deterministic
endpoint: /score
- id: approve_hard_3
name: "Hard: Approve Thread-Safe Counter"
difficulty: hard
grader:
type: deterministic
endpoint: /score
- id: adversarial_hard_4
name: "Hard: Adversarial Safe SQL Builder"
difficulty: hard
grader:
type: deterministic
endpoint: /score
- id: concurrency_hard_5
name: "Hard: Async Await Misuse"
difficulty: hard
grader:
type: deterministic
endpoint: /score
- id: dependency_injection_hard_6
name: "Hard: Tight Coupling in Service"
difficulty: hard
grader:
type: deterministic
endpoint: /score
observation_space:
type: dict
description: |
Contains code diff, file context, and review status
action_space:
type: dict
description: |
Actions include adding comments, suggesting fixes, approving, or requesting changes
reward_range:
min: -0.5
max: 1.0
max_episode_steps: 50
requires_api_keys:
- API_KEY
- API_BASE_URL
- MODEL_NAME