Create policy_engine.py
Browse files
learning_hub/policy_engine.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# learning_hub/policy_engine.py
|
| 2 |
+
from .schemas import ReflectorOutput
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
class PolicyEngine:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
# 1. سياسة قبول الدلتا (من النقطة 5)
|
| 8 |
+
self.auto_accept_threshold: float = 0.80
|
| 9 |
+
|
| 10 |
+
# 2. عتبة المراجعة البشرية (من النقطة 7)
|
| 11 |
+
# إذا كانت الخسارة (بالنسبة المئوية) أكبر من هذا الحد، أرسلها للمراجعة
|
| 12 |
+
self.human_review_loss_threshold_percent: float = -3.0
|
| 13 |
+
|
| 14 |
+
# 3. قواعد السلامة (Meta-Policy) (من النقطة 7)
|
| 15 |
+
self.admissibility_rules = {
|
| 16 |
+
"NO_SHORT_TRADES": "النظام لا يدعم صفقات SHORT.",
|
| 17 |
+
"MAX_LOSS_PER_TRADE": "يجب ألا يتجاوز وقف الخسارة الأولي 10%."
|
| 18 |
+
}
|
| 19 |
+
print("✅ Learning Hub Module: Policy Engine loaded")
|
| 20 |
+
|
| 21 |
+
def get_delta_acceptance(self, reflector_output: ReflectorOutput, trade_pnl_percent: float) -> (bool, str):
|
| 22 |
+
"""
|
| 23 |
+
يحدد ما إذا كان يجب قبول "الدلتا" المقترحة تلقائياً أم إرسالها للمراجعة.
|
| 24 |
+
(تنفيذ النقطة 5 و 7)
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
# 1. فحص سياسة المراجعة البشرية (النقطة 7)
|
| 28 |
+
if trade_pnl_percent < self.human_review_loss_threshold_percent:
|
| 29 |
+
reason = f"Human Review Required: Significant loss ({trade_pnl_percent:.2f}%)"
|
| 30 |
+
print(f"⚠️ [PolicyEngine] {reason}")
|
| 31 |
+
return False, reason # (approved=False)
|
| 32 |
+
|
| 33 |
+
# 2. فحص سياسة القبول التلقائي (النقطة 5)
|
| 34 |
+
reflector_confidence = reflector_output.confidence
|
| 35 |
+
|
| 36 |
+
# (تبسيط لسياسة النتيجة - سنعتمد على ثقة المنعكس كبداية)
|
| 37 |
+
# delta_score = 0.5 * reflector_confidence + ...
|
| 38 |
+
delta_score = reflector_confidence
|
| 39 |
+
|
| 40 |
+
if delta_score >= self.auto_accept_threshold:
|
| 41 |
+
reason = f"Auto-Accepted: High confidence ({delta_score:.2f} >= {self.auto_accept_threshold})"
|
| 42 |
+
print(f"✅ [PolicyEngine] {reason}")
|
| 43 |
+
return True, reason # (approved=True)
|
| 44 |
+
else:
|
| 45 |
+
reason = f"Human Review Required: Low confidence ({delta_score:.2f} < {self.auto_accept_threshold})"
|
| 46 |
+
print(f"ℹ️ [PolicyEngine] {reason}")
|
| 47 |
+
return False, reason # (approved=False)
|
| 48 |
+
|
| 49 |
+
def check_meta_policy_on_decision(self, decision: dict) -> (bool, str):
|
| 50 |
+
"""
|
| 51 |
+
يفحص قرار (قبل التنفيذ) مقابل قواعد السلامة العليا (Meta-Policy).
|
| 52 |
+
(تنفيذ النقطة 7)
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
# 1. قاعدة "لا لصفقات SHORT"
|
| 56 |
+
if decision.get("trade_type") == "SHORT":
|
| 57 |
+
return False, self.admissibility_rules["NO_SHORT_TRADES"]
|
| 58 |
+
|
| 59 |
+
# 2. قاعدة "الحد الأقصى لوقف الخسارة"
|
| 60 |
+
action = decision.get("action")
|
| 61 |
+
if action == "BUY":
|
| 62 |
+
current_price = decision.get("current_price", 1) # (سيتم تمريره)
|
| 63 |
+
stop_loss = decision.get("stop_loss", 0)
|
| 64 |
+
|
| 65 |
+
if current_price > 0 and stop_loss > 0:
|
| 66 |
+
loss_percent = ((current_price - stop_loss) / current_price) * 100
|
| 67 |
+
if loss_percent > 10.0:
|
| 68 |
+
reason = f"Meta-Policy Violation: Stop loss ({loss_percent:.1f}%) exceeds max (10%)"
|
| 69 |
+
return False, reason
|
| 70 |
+
|
| 71 |
+
# إذا مرت جميع الفحوصات
|
| 72 |
+
return True, "Meta-Policy Passed"
|