Spaces:

DeepParmar
/

code-review

Sleeping

code-review / openenv.yaml

changes

f8cc947 7 days ago

1.48 kB

	name: code-review-env
	version: "1.0.0"
	description: >
	A real-world code review environment where an AI agent identifies bugs in Python pull requests.
	The agent must find real bugs, avoid false positives, and not approve broken code.
	Includes a red herring in the hard task to test false positive resistance.
	author: Team Phoenix
	tags:
	- openenv
	- code-review
	- real-world
	- security
	- python

	tasks:
	- id: easy
	description: Find 3 bugs in a simple Python data processing function
	difficulty: easy
	max_steps: 8

	- id: medium
	description: Find 4 security vulnerabilities in a Python web API endpoint
	difficulty: medium
	max_steps: 15

	- id: hard
	description: Find 6 security and architectural bugs across 3 files in an async cryptographic service while avoiding a red herring
	difficulty: hard
	max_steps: 25

	observation_space:
	type: object
	fields:
	task_id: str
	language: str
	pr_title: str
	pr_description: str
	code_diff: str
	full_file: str
	existing_comments: list
	step_number: int
	max_steps: int
	review_status: str

	action_space:
	operations:
	- add_comment
	- approve
	- request_changes
	- done
	- inspect_file
	- inspect_lines
	fields:
	line_number: int (required for add_comment)
	severity: str (critical\|major\|minor\|nit)
	category: str (bug\|security\|performance\|style)
	message: str
	summary: str (required for approve and request_changes)