havinashpatil commited on
Commit
8599a81
·
1 Parent(s): 3f9399a

chore: update dependencies and include training results for README

Browse files
requirements.txt CHANGED
@@ -3,3 +3,9 @@ uvicorn[standard]>=0.23.0
3
  pydantic>=2.0.0
4
  openai>=1.0.0
5
  httpx>=0.24.1
 
 
 
 
 
 
 
3
  pydantic>=2.0.0
4
  openai>=1.0.0
5
  httpx>=0.24.1
6
+ pandas
7
+ matplotlib
8
+ transformers
9
+ torch
10
+ datasets
11
+ trl
results/reward_by_task.png ADDED
results/reward_curve.png ADDED
rewards_log.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ timestamp,task_id,step,reward,compile_score,test_ratio,efficiency_score
2
+ 2026-04-25T11:18:35.777063,easy-1,5,0.01,0.0,0.0,0.0
test_server.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import httpx
2
+ import time
3
+ import subprocess
4
+ import sys
5
+ import os
6
+
7
+ def main():
8
+ print("Starting server...")
9
+ server_process = subprocess.Popen([sys.executable, "-m", "uvicorn", "server.app:app", "--port", "7860"])
10
+
11
+ time.sleep(3) # Wait for server to start
12
+
13
+ try:
14
+ print("Testing /reset...")
15
+ res = httpx.post("http://localhost:7860/reset", json={"task_id": "auto"})
16
+ res.raise_for_status()
17
+
18
+ print("Running inference.py...")
19
+ # Just run easy task for one episode to save time
20
+ env = os.environ.copy()
21
+ env["CODEARENA_TASK"] = "easy-1"
22
+ # We don't have a real openai key or hf model downloaded, so it will hit fallback and succeed
23
+ subprocess.run([sys.executable, "inference.py", "--backend", "openai"], env=env, check=True)
24
+
25
+ print("Running plot_rewards.py...")
26
+ subprocess.run([sys.executable, "plot_rewards.py"], check=True)
27
+
28
+ print("All tests passed.")
29
+ except Exception as e:
30
+ print("Test failed:", e)
31
+ finally:
32
+ server_process.terminate()
33
+ server_process.wait()
34
+
35
+ if __name__ == "__main__":
36
+ main()