chq1155 commited on 27 days ago

Commit

ee6da62

verified ·

1 Parent(s): e32d6dc

Upload TD3B code (inference, training, baselines)

Browse files

Files changed (44) hide show

.gitattributes +1 -0
README.md +128 -3
baselines/__init__.py +19 -0
baselines/baselines.py +746 -0
baselines/run.sh +77 -0
baselines/run_mcts_tr2d2.py +421 -0
baselines/run_validation_td3b.py +548 -0
baselines/sampling_setup.py +538 -0
configs/finetune_config.py +122 -0
configs/peptune_config.yaml +159 -0
diffusion.py +1588 -0
distributed_utils.py +28 -0
env.yml +37 -0
finetune_multi_target.py +1061 -0
finetune_utils.py +571 -0
inference.py +253 -0
launch_multi_target.sh +175 -0
noise_schedule.py +150 -0
peptide_mcts.py +676 -0
roformer.py +74 -0
scoring/functions/binding.py +482 -0
scoring/functions/classifiers/hemolysis-xgboost.json +0 -0
scoring/functions/classifiers/nonfouling-xgboost.json +0 -0
scoring/functions/classifiers/permeability-xgboost.json +3 -0
scoring/functions/classifiers/solubility-xgboost.json +0 -0
scoring/functions/hemolysis.py +63 -0
scoring/functions/nonfouling.py +66 -0
scoring/functions/permeability.py +171 -0
scoring/functions/solubility.py +63 -0
scoring/scoring_functions.py +104 -0
setup.py +9 -0
td3b/__init__.py +30 -0
td3b/data_utils.py +392 -0
td3b/direction_oracle.py +709 -0
td3b/td3b_finetune.py +604 -0
td3b/td3b_losses.py +527 -0
td3b/td3b_mcts.py +307 -0
td3b/td3b_scoring.py +400 -0
tokenizer/my_tokenizers.py +424 -0
tokenizer/new_splits.txt +159 -0
tokenizer/new_vocab.txt +587 -0
utils/app.py +1287 -0
utils/timer.py +34 -0
utils/utils.py +135 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+scoring/functions/classifiers/permeability-xgboost.json filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,128 @@
----
-license: apache-2.0
----

+# TD3B: Transition-Directed Discrete Diffusion for Allosteric Binder Generation
+TD3B is a sequence-based generative framework that designs peptide binders with specified agonist or antagonist behavior. It combines a Direction Oracle, a soft binding-affinity gate, and amortized fine-tuning of a pre-trained discrete diffusion model (MDLM).
+## Installation
+```bash
+conda env create -f env.yml
+conda activate td3b
+pip install -e .
+```
+## Data and Checkpoints
+Download the pretrained checkpoints and data from [Google Drive (TBA)](placeholder_link).
+Place the files as follows:
+```
+TD3B/
+├── checkpoints/
+│   ├── pretrained.ckpt          # Pre-trained MDLM weights
+│   ├── td3b.ckpt                # Fine-tuned TD3B model
+│   └── direction_oracle.pt      # Direction Oracle weights
+├── data/
+│   ├── train.csv                # Training set (target-binder pairs)
+│   └── test.csv                 # Test set
+├── scoring/functions/classifiers/
+│   ├── binding-affinity.pt
+│   ├── hemolysis-xgboost.json
+│   ├── nonfouling-xgboost.json
+│   ├── permeability-xgboost.json
+│   └── solubility-xgboost.json
+└── tokenizer/
+    ├── new_vocab.txt
+    └── new_splits.txt
+```
+## Code Structure
+```
+TD3B/
+├── inference.py                 # Generate binders (main inference entry point)
+├── finetune_multi_target.py     # Multi-target TD3B training
+├── finetune_utils.py            # Training utilities
+├── launch_multi_target.sh       # Training launcher script
+├── diffusion.py                 # MDLM backbone (TR2-D2)
+├── roformer.py                  # RoFormer wrapper
+├── noise_schedule.py            # Noise schedules
+├── peptide_mcts.py              # MCTS tree search
+├── td3b/
+│   ├── direction_oracle.py      # Direction Oracle (f_φ)
+│   ├── td3b_scoring.py          # Gated reward R = g_ψ · σ(d*·(f_φ−0.5)/τ)
+│   ├── td3b_losses.py           # L_WDCE + λ·L_ctr + β·L_KL
+│   ├── td3b_mcts.py             # TD3B-extended MCTS
+│   ├── td3b_finetune.py         # Training loop
+│   └── data_utils.py            # Data loading utilities
+├── scoring/                     # Affinity predictor (g_ψ) and property classifiers
+├── baselines/                   # CG, SMC, TDS, PepTune, Unguided baselines
+├── tokenizer/                   # SMILES tokenizer (vocab + splits)
+├── configs/                     # Model and training configs
+└── utils/                       # Misc utilities
+```
+## Inference
+Generate agonist/antagonist binders for target proteins:
+```bash
+python inference.py \
+    --ckpt_path checkpoints/td3b.ckpt \
+    --val_csv data/test.csv \
+    --save_path results/ \
+    --seed 42 \
+    --num_pool 32 \
+    --val_samples_per_target 8 \
+    --resample_alpha 0.1
+```
+This generates 32 candidates per (target, direction), scores them with the Direction Oracle and affinity predictor, applies Algorithm 2 weighted resampling, and saves only valid peptide samples.
+Output: `results/td3b_results_seed42.csv` with columns: target, sequence, direction, affinity, gated_reward, direction_oracle, direction_accuracy.
+## Training
+### Multi-target TD3B
+1. Edit `launch_multi_target.sh` — set paths to checkpoints, data, and oracle:
+```bash
+BASE_PATH="/path/to/TD3B"
+PRETRAINED_CHECKPOINT="${BASE_PATH}/checkpoints/pretrained.ckpt"
+TRAIN_CSV="${BASE_PATH}/data/train.csv"
+ORACLE_CKPT="${BASE_PATH}/checkpoints/direction_oracle.pt"
+```
+2. Launch training:
+```bash
+bash launch_multi_target.sh
+```
+Key hyperparameters (in `launch_multi_target.sh`):
+- `CONTRASTIVE_WEIGHT=0.1` — λ for L_ctr
+- `KL_BETA=0.1` — β for L_KL
+- `SIGMOID_TEMPERATURE=0.1` — τ for gated reward
+- `NUM_ITER=20` — MCTS iterations per round
+- `NUM_CHILDREN=16` — Children per MCTS expansion
+### Baselines
+Run baseline methods (CG, SMC, TDS, PepTune, Unguided):
+```bash
+cd baselines/
+bash run.sh --baseline cg --device cuda:0
+bash run.sh --baseline smc --device cuda:0
+bash run.sh --baseline tds --device cuda:0
+```
+## Citation
+```bibtex
+@article{caotd3b,
+  title={TD3B: Transition-Directed Discrete Diffusion for Allosteric Binder Generation},
+  author={Cao, Hanqun and Pal, Aastha and Tang, Sophia and Zhang, Yinuo and Zhang, Jingjie and Heng, Pheng-Ann and Chatterjee, Pranam}
+}
+```

baselines/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from baselines.baselines import (
+    RewardInputs,
+    RewardWrapper,
+    classifier_guidance,
+    peptune_mctg_sampling,
+    unguided_sampling,
+    sequential_monte_carlo,
+    twisted_diffusion_sampler,
+)
+__all__ = [
+    "RewardInputs",
+    "RewardWrapper",
+    "classifier_guidance",
+    "peptune_mctg_sampling",
+    "unguided_sampling",
+    "sequential_monte_carlo",
+    "twisted_diffusion_sampler",
+]

baselines/baselines.py ADDED Viewed

	@@ -0,0 +1,746 @@

+import logging
+import math
+from dataclasses import dataclass
+from types import SimpleNamespace
+from typing import Callable, Dict, Optional, Tuple
+import numpy as np
+import torch
+import torch.nn.functional as F
+DEFAULT_EPS = 1e-5
+logger = logging.getLogger(__name__)
+def _sample_categorical(categorical_probs: torch.Tensor) -> torch.Tensor:
+    gumbel = 1e-10 - (torch.rand_like(categorical_probs) + 1e-10).log()
+    return (categorical_probs / gumbel).argmax(dim=-1).to(dtype=torch.long)
+def _normalize_probs(probs: torch.Tensor, dim: int = -1) -> torch.Tensor:
+    return probs / probs.sum(dim=dim, keepdim=True).clamp_min(1e-12)
+def _safe_resample_weights(weights: torch.Tensor) -> torch.Tensor:
+    if weights.numel() == 0:
+        return weights
+    weights = torch.where(torch.isfinite(weights), weights, torch.zeros_like(weights))
+    total = weights.sum()
+    if not torch.isfinite(total) or total <= 0:
+        return torch.full_like(weights, 1.0 / weights.numel())
+    return weights / total
+def _sequence_logprob(
+    probs: torch.Tensor,
+    x_next: torch.Tensor,
+    x_current: torch.Tensor,
+    mask_idx: int,
+) -> torch.Tensor:
+    gather = probs.gather(-1, x_next.unsqueeze(-1)).squeeze(-1).clamp_min(1e-12)
+    mask = (x_current == mask_idx).to(gather.dtype)
+    return (gather.log() * mask).sum(dim=-1)
+def _transition_probs_from_logits(
+    log_probs: torch.Tensor,
+    t: torch.Tensor,
+    dt: torch.Tensor,
+    mask_idx: int,
+) -> torch.Tensor:
+    change_prob_t = t[:, None, None]
+    change_prob_s = (t - dt)[:, None, None]
+    q_xs = log_probs.exp() * (change_prob_t - change_prob_s)
+    q_xs[:, :, mask_idx] = change_prob_s[:, :, 0]
+    return q_xs
+def _sample_from_q(
+    q_probs: torch.Tensor,
+    x_current: torch.Tensor,
+    mask_idx: int,
+) -> torch.Tensor:
+    x_changed = _sample_categorical(q_probs)
+    copy_flag = (x_current != mask_idx)
+    return torch.where(copy_flag, x_current, x_changed)
+def _protein_tokens_to_device(tokens: torch.Tensor, device: torch.device) -> torch.Tensor:
+    if tokens.device != device:
+        return tokens.to(device)
+    return tokens
+def _tokens_to_one_hot(tokens: torch.Tensor, vocab_size: int) -> torch.Tensor:
+    return F.one_hot(tokens, num_classes=vocab_size).float()
+def _decode_sequences(tokenizer, token_ids: torch.Tensor) -> list:
+    return tokenizer.batch_decode(token_ids)
+def _affinity_from_scoring(
+    scoring_fn: Callable,
+    sequences: list,
+    device: torch.device,
+    protein_seq: Optional[str] = None,
+) -> torch.Tensor:
+    if protein_seq is not None:
+        try:
+            scores = scoring_fn(sequences, protein_seq)
+        except TypeError:
+            try:
+                scores = scoring_fn(sequences, prot_seq=protein_seq)
+            except TypeError:
+                scores = scoring_fn(sequences)
+    else:
+        scores = scoring_fn(sequences)
+    if isinstance(scores, tuple):
+        scores = scores[0]
+    scores = np.asarray(scores)
+    if scores.ndim == 1:
+        affinity = scores
+    else:
+        affinity = scores[:, 0]
+    return torch.as_tensor(affinity, device=device, dtype=torch.float32)
+def _roformer_hidden_from_inputs(
+    base_model,
+    input_ids: Optional[torch.Tensor] = None,
+    inputs_embeds: Optional[torch.Tensor] = None,
+    attn_mask: Optional[torch.Tensor] = None,
+) -> torch.Tensor:
+    outputs = base_model.backbone.model(
+        input_ids=input_ids,
+        inputs_embeds=inputs_embeds,
+        attention_mask=attn_mask,
+        output_hidden_states=True,
+        return_dict=True,
+    )
+    return outputs.hidden_states[-1]
+def _logits_from_inputs(
+    base_model,
+    input_ids: Optional[torch.Tensor] = None,
+    inputs_embeds: Optional[torch.Tensor] = None,
+    attn_mask: Optional[torch.Tensor] = None,
+) -> torch.Tensor:
+    outputs = base_model.backbone.model(
+        input_ids=input_ids,
+        inputs_embeds=inputs_embeds,
+        attention_mask=attn_mask,
+        output_hidden_states=False,
+        return_dict=True,
+    )
+    return outputs.logits
+@dataclass
+class RewardInputs:
+    protein_tokens: torch.Tensor
+    d_star: float
+    protein_seq: str
+class RewardWrapper:
+    def __init__(
+        self,
+        scoring_fn: Callable,
+        direction_oracle: torch.nn.Module,
+        base_model,
+        tokenizer,
+        reward_inputs: RewardInputs,
+        device: torch.device,
+        fast_direction: bool = False,
+        reward_alpha: float = 0.1,
+    ):
+        self.scoring_fn = scoring_fn
+        self.direction_oracle = direction_oracle
+        self.base_model = base_model
+        self.tokenizer = tokenizer
+        self.reward_inputs = reward_inputs
+        self.device = device
+        self.fast_direction = fast_direction
+        self.reward_alpha = reward_alpha
+        self._supports_hidden_direction = all(
+            hasattr(direction_oracle, attr)
+            for attr in ("protein_embedder", "fusion", "classifier")
+        )
+        self._supports_predict = hasattr(direction_oracle, "predict_with_confidence")
+        if self.fast_direction and not self._supports_hidden_direction:
+            logger.warning("fast_direction requested but oracle lacks hidden-direction modules; disabling fast_direction.")
+            self.fast_direction = False
+        self._protein_emb_cache = None
+        if self.reward_inputs.protein_seq is None:
+            raise ValueError("RewardInputs.protein_seq is required for conditioned sampling.")
+    def _protein_emb(self, batch_size: int) -> torch.Tensor:
+        if not self._supports_hidden_direction:
+            raise RuntimeError("direction_oracle does not support hidden-direction inference.")
+        if self._protein_emb_cache is None:
+            prot_tokens = _protein_tokens_to_device(self.reward_inputs.protein_tokens, self.device)
+            prot_emb = self.direction_oracle.protein_embedder(prot_tokens)
+            self._protein_emb_cache = prot_emb
+        return self._protein_emb_cache.expand(batch_size, -1)
+    def _direction_from_hidden(
+        self,
+        hidden: torch.Tensor,
+        attn_mask: torch.Tensor,
+    ) -> torch.Tensor:
+        if not self._supports_hidden_direction:
+            raise RuntimeError("direction_oracle does not support hidden-direction inference.")
+        mask = attn_mask.to(hidden.dtype).unsqueeze(-1)
+        pooled = (hidden * mask).sum(dim=1) / mask.sum(dim=1).clamp_min(1.0)
+        protein_emb = self._protein_emb(pooled.size(0))
+        fused = self.direction_oracle.fusion(pooled, protein_emb)
+        return self.direction_oracle.classifier(fused).squeeze(-1)
+    def _direction_from_probs(
+        self,
+        y_probs: torch.Tensor,
+        attn_mask: torch.Tensor,
+    ) -> torch.Tensor:
+        if hasattr(self.direction_oracle, "predict_from_probs"):
+            prot_tokens = _protein_tokens_to_device(self.reward_inputs.protein_tokens, self.device)
+            return self.direction_oracle.predict_from_probs(y_probs, prot_tokens, attn_mask)
+        if not self._supports_hidden_direction:
+            token_ids = y_probs.argmax(dim=-1)
+            return self._direction_from_tokens(token_ids)
+        if self.fast_direction:
+            emb_weight = self.base_model.backbone.model.roformer.embeddings.word_embeddings.weight
+            inputs_embeds = y_probs @ emb_weight
+            hidden = inputs_embeds
+        else:
+            emb_weight = self.base_model.backbone.model.roformer.embeddings.word_embeddings.weight
+            inputs_embeds = y_probs @ emb_weight
+            hidden = _roformer_hidden_from_inputs(
+                self.base_model,
+                inputs_embeds=inputs_embeds,
+                attn_mask=attn_mask,
+            )
+        return self._direction_from_hidden(hidden, attn_mask)
+    def _direction_from_tokens(self, token_ids: torch.Tensor) -> torch.Tensor:
+        prot_tokens = _protein_tokens_to_device(self.reward_inputs.protein_tokens, self.device)
+        if prot_tokens.dim() == 2 and prot_tokens.size(0) == 1:
+            prot_tokens = prot_tokens.expand(token_ids.size(0), -1)
+        if self._supports_predict:
+            direction, _ = self.direction_oracle.predict_with_confidence(token_ids, prot_tokens)
+            return direction
+        return self.direction_oracle(token_ids, prot_tokens)
+    def _gated_reward(self, affinity: torch.Tensor, direction: torch.Tensor) -> torch.Tensor:
+        d_star = torch.as_tensor(self.reward_inputs.d_star, device=self.device, dtype=direction.dtype)
+        directional_score = (direction - 0.5) * d_star
+        gate = torch.sigmoid(directional_score / self.reward_alpha)
+        return affinity * gate
+    def evaluate_tokens(self, token_ids: torch.Tensor, attn_mask: torch.Tensor) -> Dict[str, torch.Tensor]:
+        sequences = _decode_sequences(self.tokenizer, token_ids)
+        affinity = _affinity_from_scoring(
+            self.scoring_fn,
+            sequences,
+            self.device,
+            protein_seq=self.reward_inputs.protein_seq,
+        )
+        with torch.no_grad():
+            direction = self._direction_from_tokens(token_ids)
+        gated_reward = self._gated_reward(affinity, direction)
+        return {
+            "sequences": sequences,
+            "affinity": affinity,
+            "direction": direction,
+            "gated_reward": gated_reward,
+        }
+    def reward_from_tokens(
+        self,
+        token_ids: torch.Tensor,
+        attn_mask: torch.Tensor,
+    ) -> torch.Tensor:
+        sequences = _decode_sequences(self.tokenizer, token_ids)
+        affinity = _affinity_from_scoring(
+            self.scoring_fn,
+            sequences,
+            self.device,
+            protein_seq=self.reward_inputs.protein_seq,
+        )
+        with torch.no_grad():
+            direction = self._direction_from_tokens(token_ids)
+        return self._gated_reward(affinity, direction)
+    def reward_from_probs(
+        self,
+        y_probs: torch.Tensor,
+        token_ids_for_affinity: torch.Tensor,
+        attn_mask: torch.Tensor,
+    ) -> torch.Tensor:
+        affinity = None
+        if hasattr(self.scoring_fn, "forward_from_probs"):
+            try:
+                affinity = self.scoring_fn.forward_from_probs(
+                    y_probs,
+                    attn_mask,
+                    prot_seq=self.reward_inputs.protein_seq,
+                )
+            except Exception as exc:
+                logger.warning("Differentiable affinity failed; falling back to argmax. Error: %s", exc)
+                affinity = None
+        if affinity is None:
+            sequences = _decode_sequences(self.tokenizer, token_ids_for_affinity)
+            affinity = _affinity_from_scoring(
+                self.scoring_fn,
+                sequences,
+                self.device,
+                protein_seq=self.reward_inputs.protein_seq,
+            )
+        direction = self._direction_from_probs(y_probs, attn_mask)
+        return self._gated_reward(affinity, direction)
+class PepTuneSampler:
+    def __init__(
+        self,
+        base_model,
+        reward_fn: RewardWrapper,
+        seq_length: int,
+        num_steps: int,
+        mcts_iterations: int,
+        num_children: int,
+        sample_prob_weight: float,
+        invalid_penalty: float,
+        pareto_max_size: Optional[int],
+        eps: float,
+    ):
+        from peptide_mcts import Node, updateParetoFront
+        from utils.app import PeptideAnalyzer
+        self.base_model = base_model
+        self.reward_fn = reward_fn
+        self.seq_length = seq_length
+        self.num_steps = num_steps
+        self.mcts_iterations = mcts_iterations
+        self.num_children = num_children
+        self.sample_prob_weight = sample_prob_weight
+        self.invalid_penalty = invalid_penalty
+        self.pareto_max_size = pareto_max_size
+        self.eps = eps
+        self.device = base_model.device
+        self.mask_idx = base_model.mask_index
+        self.tokenizer = base_model.tokenizer
+        self.analyzer = PeptideAnalyzer()
+        self.Node = Node
+        self.updateParetoFront = updateParetoFront
+        self.timesteps = torch.linspace(1, eps, num_steps + 1, device=self.device)
+        self.dt = torch.as_tensor((1 - eps) / num_steps, device=self.device)
+        self.args = SimpleNamespace(
+            num_obj=1,
+            total_num_steps=num_steps,
+            seq_length=seq_length,
+            num_children=num_children,
+        )
+    def _init_root(self):
+        masked_seq = torch.full((self.seq_length,), self.mask_idx, device=self.device, dtype=torch.long)
+        attn_mask = torch.ones_like(masked_seq, device=self.device)
+        tokens = {"seqs": masked_seq, "attention_mask": attn_mask}
+        return self.Node(
+            args=self.args,
+            tokens=tokens,
+            log_rnd=torch.zeros((), device=self.device),
+            log_policy_step=torch.zeros((), device=self.device),
+            log_pretrained_step=torch.zeros((), device=self.device),
+            totalReward=np.zeros(self.args.num_obj),
+            timestep=0,
+        )
+    def _select(self, root):
+        node = root
+        while True:
+            node, status = node.selectNode()
+            if status != 3:
+                return node, status
+    def _update_pareto(self, pareto_front, pareto_tokens, seq, token_ids, score_vector):
+        pareto_front = self.updateParetoFront(
+            pareto_front,
+            seq,
+            score_vector,
+            totalSize=self.pareto_max_size,
+        )
+        pareto_tokens = {k: pareto_tokens[k] for k in pareto_front if k in pareto_tokens}
+        if seq in pareto_front:
+            pareto_tokens[seq] = token_ids.detach().clone()
+        return pareto_front, pareto_tokens
+    def _expand(self, parent, pareto_front, pareto_tokens):
+        parent_tokens = parent.tokens["seqs"].to(self.device)
+        attn_mask = parent.tokens["attention_mask"].to(self.device)
+        t = self.timesteps[parent.timestep] * torch.ones(1, 1, device=self.device)
+        with torch.no_grad():
+            _, x_children, log_policy_step, log_pretrained_step = self.base_model.batch_mcts_reverse_step(
+                token_array=parent_tokens,
+                t=t,
+                dt=self.dt,
+                batch_size=self.num_children,
+                pretrained=self.base_model,
+            )
+        child_log_rnd = parent.log_rnd + (log_pretrained_step - log_policy_step)
+        log_policy_step = log_policy_step * self.sample_prob_weight
+        x_rollout = x_children
+        t_step = self.timesteps[parent.timestep] * torch.ones(self.num_children, 1, device=self.device)
+        for i in range(1, self.num_steps - parent.timestep):
+            t_step = self.timesteps[parent.timestep + i] * torch.ones(self.num_children, 1, device=self.device)
+            with torch.no_grad():
+                _, x_next, _, _ = self.base_model.mcts_reverse_step(
+                    x_rollout,
+                    t=t_step,
+                    dt=self.dt,
+                    pretrained=self.base_model,
+                )
+            x_rollout = x_next
+        if (x_rollout == self.mask_idx).any().item():
+            with torch.no_grad():
+                _, x_next, _, _ = self.base_model.mcts_noise_removal(
+                    x_rollout,
+                    t=t_step,
+                    dt=self.dt,
+                    pretrained=self.base_model,
+                )
+            x_rollout = x_next
+        sequences = self.tokenizer.batch_decode(x_rollout)
+        valid_mask = [self.analyzer.is_peptide(seq) for seq in sequences]
+        reward_values = np.full(self.num_children, -float(self.invalid_penalty), dtype=np.float32)
+        if any(valid_mask):
+            valid_tokens = x_rollout[valid_mask]
+            valid_sequences = [seq for seq, keep in zip(sequences, valid_mask) if keep]
+            affinity = _affinity_from_scoring(
+                self.reward_fn.scoring_fn,
+                valid_sequences,
+                self.device,
+                protein_seq=self.reward_fn.reward_inputs.protein_seq,
+            )
+            with torch.no_grad():
+                direction = self.reward_fn._direction_from_tokens(valid_tokens)
+            gated_reward = self.reward_fn._gated_reward(affinity, direction)
+            d_star = self.reward_fn.reward_inputs.d_star
+            dir_score = (direction - 0.5) * d_star
+            for idx, seq in enumerate(valid_sequences):
+                score_vector = np.array(
+                    [float(affinity[idx].item()), float(dir_score[idx].item())],
+                    dtype=np.float32,
+                )
+                pareto_front, pareto_tokens = self._update_pareto(
+                    pareto_front,
+                    pareto_tokens,
+                    seq,
+                    valid_tokens[idx],
+                    score_vector,
+                )
+            reward_values[np.array(valid_mask)] = gated_reward.detach().cpu().numpy()
+        reward_vectors = []
+        for i in range(self.num_children):
+            child_tokens = {"seqs": x_children[i].to(dtype=torch.long), "attention_mask": attn_mask}
+            reward_vec = np.array([float(reward_values[i])], dtype=np.float32)
+            parent.addChildNode(
+                tokens=child_tokens,
+                log_rnd=child_log_rnd[i],
+                log_policy_step=log_policy_step[i],
+                log_pretrained_step=log_pretrained_step[i],
+                totalReward=reward_vec,
+            )
+            reward_vectors.append(reward_vec)
+        avg_reward = np.mean(np.stack(reward_vectors, axis=0), axis=0)
+        node = parent
+        while node:
+            node.updateNode(avg_reward)
+            node = node.parentNode
+        return pareto_front, pareto_tokens
+    def _select_from_pareto(self, pareto_front, pareto_tokens, batch_size):
+        if not pareto_front:
+            return self.base_model.sample_prior(batch_size, self.seq_length).to(self.device)
+        seqs = list(pareto_front.keys())
+        scores = np.stack([pareto_front[seq] for seq in seqs], axis=0)
+        affinity = scores[:, 0]
+        dir_score = scores[:, 1]
+        gate = 1.0 / (1.0 + np.exp(-dir_score / max(self.reward_fn.reward_alpha, 1e-6)))
+        gated = affinity * gate
+        order = np.argsort(-gated)
+        if len(order) >= batch_size:
+            selected = [seqs[i] for i in order[:batch_size]]
+        else:
+            repeats = np.random.choice(order, size=batch_size, replace=True)
+            selected = [seqs[i] for i in repeats]
+        tokens = [pareto_tokens[seq] for seq in selected]
+        return torch.stack(tokens, dim=0).to(self.device)
+    def sample(self, batch_size):
+        self.base_model.eval()
+        root = self._init_root()
+        pareto_front = {}
+        pareto_tokens = {}
+        for _ in range(self.mcts_iterations):
+            leaf, status = self._select(root)
+            if status == 1:
+                continue
+            pareto_front, pareto_tokens = self._expand(leaf, pareto_front, pareto_tokens)
+        return self._select_from_pareto(pareto_front, pareto_tokens, batch_size)
+def _logits_and_probs_from_tokens(
+    base_model,
+    token_ids: torch.Tensor,
+    attn_mask: torch.Tensor,
+) -> torch.Tensor:
+    logits = _logits_from_inputs(base_model, input_ids=token_ids, attn_mask=attn_mask)
+    log_probs = base_model.subs_parameterization(logits, token_ids)
+    return log_probs
+def _logits_and_probs_from_one_hot(
+    base_model,
+    y_one_hot: torch.Tensor,
+    token_ids: torch.Tensor,
+    attn_mask: torch.Tensor,
+) -> torch.Tensor:
+    emb_weight = base_model.backbone.model.roformer.embeddings.word_embeddings.weight
+    inputs_embeds = y_one_hot @ emb_weight
+    logits = _logits_from_inputs(base_model, inputs_embeds=inputs_embeds, attn_mask=attn_mask)
+    log_probs = base_model.subs_parameterization(logits, token_ids)
+    return log_probs
+def classifier_guidance(
+    base_model,
+    reward_fn: RewardWrapper,
+    batch_size: int,
+    seq_length: int,
+    num_steps: int,
+    guidance_scale: float,
+    eps: float = DEFAULT_EPS,
+    guidance_steps: Optional[int] = None,
+) -> Dict[str, torch.Tensor]:
+    device = base_model.device
+    mask_idx = base_model.mask_index
+    vocab_size = base_model.vocab_size
+    x = base_model.sample_prior(batch_size, seq_length).to(device)
+    attn_mask = torch.ones_like(x, device=device)
+    timesteps = torch.linspace(1, eps, num_steps + 1, device=device)
+    dt = torch.as_tensor((1 - eps) / num_steps, device=device)
+    guidance_enabled = True
+    for step in range(num_steps):
+        t = timesteps[step].repeat(batch_size)
+        use_guidance = guidance_enabled and (guidance_steps is None or step >= num_steps - guidance_steps)
+        if not use_guidance:
+            log_probs = _logits_and_probs_from_tokens(base_model, x, attn_mask)
+            q_base = _transition_probs_from_logits(log_probs, t, dt, mask_idx)
+            x = _sample_from_q(q_base, x, mask_idx)
+            continue
+        y_one_hot = _tokens_to_one_hot(x, vocab_size).to(device)
+        y_one_hot.requires_grad_(True)
+        token_ids = x.detach()
+        log_probs = _logits_and_probs_from_one_hot(base_model, y_one_hot, token_ids, attn_mask)
+        y_probs = log_probs.exp()
+        token_ids_for_affinity = y_probs.argmax(dim=-1).detach()
+        reward = reward_fn.reward_from_probs(y_probs, token_ids_for_affinity, attn_mask)
+        if not reward.requires_grad:
+            if guidance_enabled:
+                logger.warning(
+                    "Reward does not require grad; disabling gradient guidance for classifier_guidance."
+                )
+            guidance_enabled = False
+            q_base = _transition_probs_from_logits(log_probs, t, dt, mask_idx)
+            x = _sample_from_q(q_base, x, mask_idx)
+            continue
+        reward.sum().backward()
+        grad = y_one_hot.grad
+        q_base = _transition_probs_from_logits(log_probs, t, dt, mask_idx)
+        guidance = guidance_scale * (grad - grad[:, :, mask_idx].unsqueeze(-1))
+        guidance = guidance.clamp(min=-50.0, max=50.0)
+        q_guided = q_base * torch.exp(guidance)
+        q_guided = _normalize_probs(q_guided)
+        x = _sample_from_q(q_guided, x, mask_idx)
+    return {"tokens": x}
+def unguided_sampling(
+    base_model,
+    batch_size: int,
+    seq_length: int,
+    num_steps: int,
+    eps: float = DEFAULT_EPS,
+) -> Dict[str, torch.Tensor]:
+    device = base_model.device
+    mask_idx = base_model.mask_index
+    x = base_model.sample_prior(batch_size, seq_length).to(device)
+    attn_mask = torch.ones_like(x, device=device)
+    timesteps = torch.linspace(1, eps, num_steps + 1, device=device)
+    dt = torch.as_tensor((1 - eps) / num_steps, device=device)
+    for step in range(num_steps):
+        t = timesteps[step].repeat(batch_size)
+        log_probs = _logits_and_probs_from_tokens(base_model, x, attn_mask)
+        q_base = _transition_probs_from_logits(log_probs, t, dt, mask_idx)
+        x = _sample_from_q(q_base, x, mask_idx)
+    return {"tokens": x}
+def sequential_monte_carlo(
+    base_model,
+    reward_fn: RewardWrapper,
+    batch_size: int,
+    seq_length: int,
+    num_steps: int,
+    alpha: float,
+    eps: float = DEFAULT_EPS,
+) -> Dict[str, torch.Tensor]:
+    device = base_model.device
+    mask_idx = base_model.mask_index
+    x = base_model.sample_prior(batch_size, seq_length).to(device)
+    attn_mask = torch.ones_like(x, device=device)
+    timesteps = torch.linspace(1, eps, num_steps + 1, device=device)
+    dt = torch.as_tensor((1 - eps) / num_steps, device=device)
+    with torch.no_grad():
+        r_current = reward_fn.reward_from_tokens(x, attn_mask).detach()
+    for step in range(num_steps):
+        t = timesteps[step].repeat(batch_size)
+        log_probs = _logits_and_probs_from_tokens(base_model, x, attn_mask)
+        q_base = _transition_probs_from_logits(log_probs, t, dt, mask_idx)
+        x_next = _sample_from_q(q_base, x, mask_idx)
+        with torch.no_grad():
+            r_next = reward_fn.reward_from_tokens(x_next, attn_mask).detach()
+        weights = torch.exp((r_next - r_current) / alpha).clamp_max(1e6)
+        weights = _safe_resample_weights(weights)
+        indices = torch.multinomial(weights, num_samples=batch_size, replacement=True)
+        x = x_next[indices]
+        r_current = r_next[indices]
+    return {"tokens": x}
+def twisted_diffusion_sampler(
+    base_model,
+    reward_fn: RewardWrapper,
+    batch_size: int,
+    seq_length: int,
+    num_steps: int,
+    guidance_scale: float,
+    alpha: float,
+    eps: float = DEFAULT_EPS,
+    guidance_steps: Optional[int] = None,
+) -> Dict[str, torch.Tensor]:
+    device = base_model.device
+    mask_idx = base_model.mask_index
+    vocab_size = base_model.vocab_size
+    x = base_model.sample_prior(batch_size, seq_length).to(device)
+    attn_mask = torch.ones_like(x, device=device)
+    timesteps = torch.linspace(1, eps, num_steps + 1, device=device)
+    dt = torch.as_tensor((1 - eps) / num_steps, device=device)
+    with torch.no_grad():
+        r_current = reward_fn.reward_from_tokens(x, attn_mask).detach()
+    guidance_enabled = True
+    for step in range(num_steps):
+        t = timesteps[step].repeat(batch_size)
+        use_guidance = guidance_enabled and (guidance_steps is None or step >= num_steps - guidance_steps)
+        if use_guidance:
+            y_one_hot = _tokens_to_one_hot(x, vocab_size).to(device)
+            y_one_hot.requires_grad_(True)
+            token_ids = x.detach()
+            log_probs = _logits_and_probs_from_one_hot(base_model, y_one_hot, token_ids, attn_mask)
+            y_probs = log_probs.exp()
+            token_ids_for_affinity = y_probs.argmax(dim=-1).detach()
+            reward = reward_fn.reward_from_probs(y_probs, token_ids_for_affinity, attn_mask)
+            q_base = _transition_probs_from_logits(log_probs, t, dt, mask_idx)
+            if not reward.requires_grad:
+                if guidance_enabled:
+                    logger.warning(
+                        "Reward does not require grad; disabling gradient guidance for twisted_diffusion_sampler."
+                    )
+                guidance_enabled = False
+                q_guided = q_base
+            else:
+                reward.sum().backward()
+                grad = y_one_hot.grad
+                guidance = guidance_scale * (grad - grad[:, :, mask_idx].unsqueeze(-1))
+                guidance = guidance.clamp(min=-50.0, max=50.0)
+                q_guided = q_base * torch.exp(guidance)
+                q_guided = _normalize_probs(q_guided)
+        else:
+            log_probs = _logits_and_probs_from_tokens(base_model, x, attn_mask)
+            q_base = _transition_probs_from_logits(log_probs, t, dt, mask_idx)
+            q_guided = q_base
+        x_next = _sample_from_q(q_guided, x, mask_idx)
+        with torch.no_grad():
+            r_next = reward_fn.reward_from_tokens(x_next, attn_mask).detach()
+        logp_guided = _sequence_logprob(q_guided, x_next, x, mask_idx)
+        logp_base = _sequence_logprob(q_base, x_next, x, mask_idx)
+        weights = torch.exp((r_next - r_current) / alpha + (logp_base - logp_guided)).clamp_max(1e6)
+        weights = _safe_resample_weights(weights)
+        indices = torch.multinomial(weights, num_samples=batch_size, replacement=True)
+        x = x_next[indices]
+        r_current = r_next[indices]
+    return {"tokens": x}
+def peptune_mctg_sampling(
+    base_model,
+    reward_fn: RewardWrapper,
+    batch_size: int,
+    seq_length: int,
+    num_steps: int,
+    mcts_iterations: int,
+    num_children: int,
+    alpha: float,
+    sample_prob_weight: float,
+    invalid_penalty: float = 1.0,
+    pareto_max_size: Optional[int] = None,
+    eps: float = DEFAULT_EPS,
+) -> Dict[str, torch.Tensor]:
+    sampler = PepTuneSampler(
+        base_model=base_model,
+        reward_fn=reward_fn,
+        seq_length=seq_length,
+        num_steps=num_steps,
+        mcts_iterations=mcts_iterations,
+        num_children=num_children,
+        sample_prob_weight=sample_prob_weight,
+        invalid_penalty=invalid_penalty,
+        pareto_max_size=pareto_max_size,
+        eps=eps,
+    )
+    tokens = sampler.sample(batch_size=batch_size)
+    return {"tokens": tokens}

baselines/run.sh ADDED Viewed

	@@ -0,0 +1,77 @@

+#!/usr/bin/env bash
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
+CSV_PATH="${1:-To Be Added}"
+BASELINE="${2:-unguided}"
+DEVICE="${3:-cuda:4}"
+OUTPUT_DIR="${4:-${SCRIPT_DIR}/outputs}"
+NGPUS="${5:-1}"
+MASTER_PORT="${6:-29500}"
+if [ "$NGPUS" -gt 1 ]; then
+  echo "Running multi-GPU inference with $NGPUS GPUs (master port: $MASTER_PORT)"
+  LAUNCH_DEVICE="cuda"
+  python -m torch.distributed.run \
+    --nproc_per_node="$NGPUS" \
+    --master_port="$MASTER_PORT" \
+    "${SCRIPT_DIR}/sampling_setup.py" \
+    --ckpt_path "${ROOT_DIR}/pretrained/peptune-pretrained.ckpt" \
+    --device "${LAUNCH_DEVICE}" \
+    --baseline "${BASELINE}" \
+    --targets_csv "${CSV_PATH}" \
+    --batch_size 8 \
+    --num_steps 128 \
+    --num_batches 1 \
+    --output_dir "${OUTPUT_DIR}"
+  export OUTPUT_DIR BASELINE
+  python - <<'PY'
+import glob
+import os
+import pandas as pd
+out_dir = os.environ["OUTPUT_DIR"]
+baseline = os.environ["BASELINE"]
+def merge(pattern, output_name):
+    files = sorted(glob.glob(os.path.join(out_dir, pattern)))
+    if not files:
+        return
+    dfs = []
+    for path in files:
+        try:
+            dfs.append(pd.read_csv(path))
+        except Exception as exc:
+            print(f"[merge] skip {path}: {exc}")
+    if not dfs:
+        return
+    merged = pd.concat(dfs, ignore_index=True)
+    merged.to_csv(os.path.join(out_dir, output_name), index=False)
+    print(f"[merge] wrote {output_name} from {len(files)} shards")
+merge(f"{baseline}_samples_rank*.csv", f"{baseline}_samples.csv")
+merge("batch_times_rank*.csv", "batch_times.csv")
+merge(f"{baseline}_metrics_rank*.csv", f"{baseline}_metrics.csv")
+PY
+  exit 0
+fi
+python "${SCRIPT_DIR}/sampling_setup.py" \
+  --ckpt_path "${ROOT_DIR}/pretrained/peptune-pretrained.ckpt" \
+  --device "${DEVICE}" \
+  --baseline "${BASELINE}" \
+  --targets_csv "${CSV_PATH}" \
+  --batch_size 8 \
+  --num_steps 128 \
+  --num_batches 1 \
+  --output_dir "${OUTPUT_DIR}"
+# ./run.sh To Be Added peptune cuda:0 To Be Added
+# ./run.sh To Be Added peptune cuda To Be Added 4 29501
+# ./run.sh To Be Added tds cuda:1 To Be Added
+# ./run.sh To Be Added smc cuda:2 To Be Added
+# ./run.sh To Be Added cg cuda:3 To Be Added
+# ./run.sh To Be Added unguided cuda:4 To Be Added

baselines/run_mcts_tr2d2.py ADDED Viewed

	@@ -0,0 +1,421 @@

+#!/usr/bin/env python3
+import argparse
+import os
+import sys
+from types import SimpleNamespace
+from typing import Any, Dict, List, Tuple
+import numpy as np
+import pandas as pd
+import torch
+import torch.distributed as dist
+ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if ROOT_DIR not in sys.path:
+    sys.path.insert(0, ROOT_DIR)
+from diffusion import Diffusion
+from configs.finetune_config import (
+    DiffusionConfig,
+    RoFormerConfig,
+    NoiseConfig,
+    TrainingConfig,
+    SamplingConfig,
+    EvalConfig,
+    OptimConfig,
+    MCTSConfig,
+)
+from finetune_utils import load_tokenizer
+from finetune_distributed_utils import setup_distributed, cleanup_distributed, is_main_process
+from scoring.functions.binding import MultiTargetBindingAffinity, TargetSpecificBindingAffinity
+from td3b.direction_oracle import DirectionalOracle
+from finetune_multi_target_tr2d2_ddp import TR2D2GatedReward, TargetDataset, create_tr2d2_mcts
+from utils.app import PeptideAnalyzer
+def _load_checkpoint(ckpt_path: str, device: torch.device) -> Dict[str, Any]:
+    ckpt = torch.load(ckpt_path, map_location=device, weights_only=False)
+    if not isinstance(ckpt, dict):
+        raise ValueError(f"Unsupported checkpoint format: {type(ckpt)}")
+    return ckpt
+def _extract_state_and_config(ckpt: Dict[str, Any]) -> Dict[str, Any]:
+    state_dict = ckpt.get("model_state_dict") or ckpt.get("state_dict") or ckpt
+    config = ckpt.get("config") or {}
+    return {"state_dict": state_dict, "config": config}
+def _build_args(cfg: Dict[str, Any], cli: argparse.Namespace) -> argparse.Namespace:
+    defaults = {
+        "base_path": "To Be Added",
+        "seq_length": 200,
+        "sampling_eps": 1e-3,
+        "total_num_steps": 128,
+        "alpha": 0.1,
+        "hidden_dim": 768,
+        "num_layers": 8,
+        "num_heads": 8,
+        "min_affinity_threshold": 0.0,
+        "sigmoid_temperature": 0.1,
+        "val_samples_per_target": 8,
+        "direction_oracle_esm_name": "facebook/esm2_t33_650M_UR50D",
+        "direction_oracle_esm_cache_dir": None,
+        "direction_oracle_esm_local_files_only": False,
+        "direction_oracle_max_ligand_length": 768,
+        "direction_oracle_max_protein_length": 1024,
+        "direction_oracle_d_model": 256,
+        "direction_oracle_n_heads": 4,
+        "direction_oracle_n_self_attn_layers": 1,
+        "direction_oracle_n_bmca_layers": 2,
+        "direction_oracle_dropout": 0.3,
+        "num_iter": 20,
+        "num_children": 24,
+        "buffer_size": 32,
+        "exploration": 1.0,
+    }
+    merged = dict(defaults)
+    merged.update(cfg or {})
+    if cli.base_path is not None:
+        merged["base_path"] = cli.base_path
+    if cli.val_csv is not None:
+        merged["val_csv"] = cli.val_csv
+    if cli.save_path is not None:
+        merged["save_path"] = cli.save_path
+    if cli.device is not None:
+        merged["device"] = cli.device
+    if cli.val_samples_per_target is not None:
+        merged["val_samples_per_target"] = cli.val_samples_per_target
+    if cli.seq_length is not None:
+        merged["seq_length"] = cli.seq_length
+    if cli.total_num_steps is not None:
+        merged["total_num_steps"] = cli.total_num_steps
+    if cli.sampling_eps is not None:
+        merged["sampling_eps"] = cli.sampling_eps
+    if cli.alpha is not None:
+        merged["alpha"] = cli.alpha
+    if cli.num_iter is not None:
+        merged["num_iter"] = cli.num_iter
+    if cli.num_children is not None:
+        merged["num_children"] = cli.num_children
+    if cli.buffer_size is not None:
+        merged["buffer_size"] = cli.buffer_size
+    if cli.exploration is not None:
+        merged["exploration"] = cli.exploration
+    if cli.max_sequence_length is not None:
+        merged["max_sequence_length"] = cli.max_sequence_length
+    args = SimpleNamespace(**merged)
+    base_tr2d2_path = os.path.join(args.base_path, "tr2d2-pep")
+    if not getattr(args, "direction_oracle_ckpt", None):
+        args.direction_oracle_ckpt = os.path.join(base_tr2d2_path, "direction_oracle.pt")
+    if not getattr(args, "direction_oracle_tr2d2_checkpoint", None):
+        args.direction_oracle_tr2d2_checkpoint = os.path.join(
+            base_tr2d2_path, "pretrained", "peptune-pretrained.ckpt"
+        )
+    if not getattr(args, "direction_oracle_tokenizer_vocab", None):
+        args.direction_oracle_tokenizer_vocab = os.path.join(
+            base_tr2d2_path, "tokenizer", "new_vocab.txt"
+        )
+    if not getattr(args, "direction_oracle_tokenizer_splits", None):
+        args.direction_oracle_tokenizer_splits = os.path.join(
+            base_tr2d2_path, "tokenizer", "new_splits.txt"
+        )
+    if not getattr(args, "save_path", None):
+        args.save_path = os.path.join(base_tr2d2_path, "baselines", "outputs_mcts_tr2d2")
+    os.makedirs(args.save_path, exist_ok=True)
+    return args
+def _build_model(args: argparse.Namespace, state_dict: Dict[str, Any], device: torch.device) -> Diffusion:
+    config = DiffusionConfig(
+        roformer=RoFormerConfig(
+            hidden_size=args.hidden_dim,
+            n_layers=args.num_layers,
+            n_heads=args.num_heads,
+        ),
+        noise=NoiseConfig(),
+        training=TrainingConfig(sampling_eps=args.sampling_eps),
+        sampling=SamplingConfig(
+            steps=args.total_num_steps,
+            sampling_eps=args.sampling_eps,
+        ),
+        eval_cfg=EvalConfig(),
+        optim=OptimConfig(lr=getattr(args, "learning_rate", 3e-4)),
+        mcts=MCTSConfig(),
+    )
+    tokenizer = load_tokenizer(args.base_path)
+    model = Diffusion(
+        config=config,
+        tokenizer=tokenizer,
+        device=device,
+    ).to(device)
+    load_result = model.load_state_dict(state_dict, strict=False)
+    if load_result.missing_keys:
+        print(f"[load] Missing keys: {len(load_result.missing_keys)}")
+    if load_result.unexpected_keys:
+        print(f"[load] Unexpected keys: {len(load_result.unexpected_keys)}")
+    model.eval()
+    return model
+def _build_oracle(args: argparse.Namespace, device: torch.device) -> DirectionalOracle:
+    oracle = DirectionalOracle(
+        model_ckpt=args.direction_oracle_ckpt,
+        tr2d2_checkpoint=args.direction_oracle_tr2d2_checkpoint,
+        tokenizer_vocab=args.direction_oracle_tokenizer_vocab,
+        tokenizer_splits=args.direction_oracle_tokenizer_splits,
+        esm_name=args.direction_oracle_esm_name,
+        d_model=args.direction_oracle_d_model,
+        n_heads=args.direction_oracle_n_heads,
+        n_self_attn_layers=args.direction_oracle_n_self_attn_layers,
+        n_bmca_layers=args.direction_oracle_n_bmca_layers,
+        dropout=args.direction_oracle_dropout,
+        max_ligand_length=args.direction_oracle_max_ligand_length,
+        max_protein_length=args.direction_oracle_max_protein_length,
+        device=device,
+        esm_cache_dir=args.direction_oracle_esm_cache_dir,
+        esm_local_files_only=args.direction_oracle_esm_local_files_only,
+    )
+    oracle.eval()
+    return oracle
+def _compute_direction_accuracy(directions: np.ndarray, d_star: float) -> np.ndarray:
+    if directions.size == 0:
+        return directions
+    acc = np.full(directions.shape, np.nan, dtype=np.float32)
+    valid = np.isfinite(directions)
+    if not valid.any():
+        return acc
+    if d_star > 0:
+        acc[valid] = (directions[valid] >= 0.5).astype(np.float32)
+    else:
+        acc[valid] = (directions[valid] < 0.5).astype(np.float32)
+    return acc
+def _nanmean(values: np.ndarray) -> float:
+    if values.size == 0:
+        return 0.0
+    finite = values[np.isfinite(values)]
+    return float(np.mean(finite)) if finite.size else 0.0
+def _nanstd(values: np.ndarray) -> float:
+    if values.size == 0:
+        return 0.0
+    finite = values[np.isfinite(values)]
+    return float(np.std(finite)) if finite.size else 0.0
+def main() -> None:
+    parser = argparse.ArgumentParser(description="MCTS-based TR2-D2 evaluation.")
+    parser.add_argument("--ckpt_path", required=True, help="Path to finetuned checkpoint (.ckpt)")
+    parser.add_argument("--val_csv", required=True, help="Validation CSV path")
+    parser.add_argument("--device", default="cuda", help="Device string (e.g., cuda:0 or cpu)")
+    parser.add_argument("--base_path", default=None, help="Base path for TR2-D2")
+    parser.add_argument("--save_path", default=None, help="Output directory for evaluation CSV")
+    parser.add_argument("--epoch", type=int, default=0, help="Epoch number to label outputs")
+    parser.add_argument("--val_samples_per_target", type=int, default=None, help="Samples per target (unused by MCTS)")
+    parser.add_argument("--seq_length", type=int, default=None, help="Fallback sequence length")
+    parser.add_argument("--total_num_steps", type=int, default=None, help="Diffusion steps")
+    parser.add_argument("--sampling_eps", type=float, default=None, help="Sampling epsilon")
+    parser.add_argument("--alpha", type=float, default=None, help="MCTS alpha temperature")
+    parser.add_argument("--num_iter", type=int, default=None, help="MCTS iterations")
+    parser.add_argument("--num_children", type=int, default=None, help="MCTS children per expand")
+    parser.add_argument("--buffer_size", type=int, default=None, help="MCTS buffer size")
+    parser.add_argument("--exploration", type=float, default=None, help="MCTS exploration constant")
+    parser.add_argument("--max_sequence_length", type=int, default=1035)
+    parser.add_argument("--max_attempts", type=int, default=3, help="Max MCTS attempts to reach target count")
+    parser.add_argument("--seed", type=int, default=None, help="Random seed")
+    cli_args = parser.parse_args()
+    rank = int(os.environ.get("LOCAL_RANK", 0))
+    world_size = int(os.environ.get("WORLD_SIZE", 1))
+    if world_size > 1:
+        setup_distributed(rank, world_size)
+        device = torch.device(f"cuda:{rank}" if torch.cuda.is_available() else "cpu")
+    else:
+        device = torch.device(cli_args.device)
+    if cli_args.seed is not None:
+        torch.manual_seed(cli_args.seed + rank)
+        np.random.seed(cli_args.seed + rank)
+    ckpt = _load_checkpoint(cli_args.ckpt_path, device)
+    payload = _extract_state_and_config(ckpt)
+    args = _build_args(payload["config"], cli_args)
+    tokenizer = load_tokenizer(args.base_path)
+    val_dataset = TargetDataset(args.val_csv, tokenizer=tokenizer)
+    policy_model = _build_model(args, payload["state_dict"], device)
+    multi_target_affinity = MultiTargetBindingAffinity(
+        tokenizer=tokenizer,
+        base_path=args.base_path,
+        device=device,
+        emb_model=policy_model.backbone,
+    )
+    directional_oracle = _build_oracle(args, device)
+    analyzer = PeptideAnalyzer()
+    val_targets = val_dataset.get_all_targets()
+    if world_size > 1:
+        my_targets = val_targets[rank::world_size]
+    else:
+        my_targets = val_targets
+    records: List[Dict[str, Any]] = []
+    protein_token_cache: Dict[str, torch.Tensor] = {}
+    with torch.no_grad():
+        for target_seq in my_targets:
+            target_tokens = protein_token_cache.get(target_seq)
+            if target_tokens is None:
+                target_tokens = directional_oracle.encode_protein(target_seq)
+                protein_token_cache[target_seq] = target_tokens
+            for direction_name, d_star in [("agonist", 1.0), ("antagonist", -1.0)]:
+                target_length = val_dataset.get_sequence_length(target_seq, direction_name)
+                if target_length > args.max_sequence_length:
+                    target_length = args.max_sequence_length
+                original_seq_length = args.seq_length
+                args.seq_length = int(target_length)
+                target_affinity = TargetSpecificBindingAffinity(multi_target_affinity, target_seq)
+                reward_model = TR2D2GatedReward(
+                    affinity_predictor=target_affinity,
+                    directional_oracle=directional_oracle,
+                    target_direction=d_star,
+                    target_protein_tokens=target_tokens,
+                    tokenizer=tokenizer,
+                    device=device,
+                    min_affinity_threshold=args.min_affinity_threshold,
+                    temperature=args.sigmoid_temperature,
+                )
+                mcts = create_tr2d2_mcts(
+                    args=args,
+                    policy_model=policy_model,
+                    reward_function=reward_model,
+                    buffer_size=args.buffer_size,
+                )
+                target_count = int(args.val_samples_per_target)
+                collected_sequences: List[str] = []
+                attempt_valid_fractions: List[float] = []
+                for attempt in range(max(cli_args.max_attempts, 1)):
+                    try:
+                        _, _, _, _, sequences = mcts.forward(resetTree=True)
+                    except Exception as exc:
+                        print(f"[mcts] failed for target={target_seq[:12]} dir={direction_name}: {exc}")
+                        sequences = []
+                    attempt_valid = float(np.mean(mcts.valid_fraction_log)) if getattr(mcts, "valid_fraction_log", None) else 0.0
+                    attempt_valid_fractions.append(attempt_valid)
+                    if sequences:
+                        collected_sequences.extend(sequences)
+                    if len(collected_sequences) >= target_count:
+                        break
+                args.seq_length = original_seq_length
+                valid_fraction = _nanmean(np.asarray(attempt_valid_fractions, dtype=np.float32))
+                if not collected_sequences:
+                    records.append(
+                        {
+                            "target": target_seq[:20],
+                            "sequence": "",
+                            "target_direction": d_star,
+                            "is_valid": False,
+                            "valid_fraction": valid_fraction,
+                            "affinity": np.nan,
+                            "gated_reward": np.nan,
+                            "direction_oracle": np.nan,
+                            "consistency_reward": np.nan,
+                            "direction_accuracy": np.nan,
+                            "success_rate": np.nan,
+                        }
+                    )
+                    continue
+                if len(collected_sequences) > target_count:
+                    collected_sequences = collected_sequences[:target_count]
+                gated_rewards, affinities, confidences, directions = reward_model.reward_fn.compute_gated_reward(collected_sequences)
+                direction_accuracy = _compute_direction_accuracy(directions, d_star)
+                consistency = d_star * (directions - 0.5)
+                success_rate = direction_accuracy * valid_fraction
+                valid_mask = np.array([analyzer.is_peptide(seq) for seq in collected_sequences], dtype=bool)
+                for idx, seq in enumerate(collected_sequences):
+                    records.append(
+                        {
+                            "target": target_seq[:20],
+                            "sequence": seq,
+                            "target_direction": d_star,
+                            "is_valid": bool(valid_mask[idx]) if valid_mask.size else False,
+                            "valid_fraction": valid_fraction,
+                            "affinity": float(affinities[idx]) if len(affinities) else np.nan,
+                            "gated_reward": float(gated_rewards[idx]) if len(gated_rewards) else np.nan,
+                            "direction_oracle": float(directions[idx]) if len(directions) else np.nan,
+                            "consistency_reward": float(consistency[idx]) if len(consistency) else np.nan,
+                            "direction_accuracy": float(direction_accuracy[idx]) if len(direction_accuracy) else np.nan,
+                            "success_rate": float(success_rate[idx]) if len(success_rate) else np.nan,
+                        }
+                    )
+    if world_size > 1:
+        gathered: List[List[Dict[str, Any]]] = [None for _ in range(world_size)]
+        dist.all_gather_object(gathered, records)
+        if is_main_process():
+            records = [item for sub in gathered for item in sub]
+        else:
+            cleanup_distributed()
+            return
+    if is_main_process():
+        df = pd.DataFrame(records)
+        output_path = os.path.join(args.save_path, f"mcts_validation_epoch_{cli_args.epoch}.csv")
+        df.to_csv(output_path, index=False)
+        print(f"MCTS validation sequences saved to {output_path}")
+        affinities = df["affinity"].to_numpy(dtype=np.float32)
+        gated_rewards = df["gated_reward"].to_numpy(dtype=np.float32)
+        directions = df["direction_oracle"].to_numpy(dtype=np.float32)
+        target_directions = df["target_direction"].to_numpy(dtype=np.float32)
+        direction_correct = df["direction_accuracy"].to_numpy(dtype=np.float32)
+        valid_fractions = df["valid_fraction"].to_numpy(dtype=np.float32)
+        pos_mask = target_directions == 1.0
+        neg_mask = target_directions == -1.0
+        print("MCTS validation summary")
+        print(f"  Affinity (d*=1): {_nanmean(affinities[pos_mask]):.4f} ± {_nanstd(affinities[pos_mask]):.4f}")
+        print(f"  Affinity (d*=-1): {_nanmean(affinities[neg_mask]):.4f} ± {_nanstd(affinities[neg_mask]):.4f}")
+        print(f"  Direction Accuracy (d*=1): {_nanmean(direction_correct[pos_mask]):.4f} ± {_nanstd(direction_correct[pos_mask]):.4f}")
+        print(f"  Direction Accuracy (d*=-1): {_nanmean(direction_correct[neg_mask]):.4f} ± {_nanstd(direction_correct[neg_mask]):.4f}")
+        print(f"  Gated Reward (overall): {_nanmean(gated_rewards):.4f} ± {_nanstd(gated_rewards):.4f}")
+        print(f"  Valid Fraction: {_nanmean(valid_fractions):.4f} ± {_nanstd(valid_fractions):.4f}")
+    if world_size > 1:
+        cleanup_distributed()
+if __name__ == "__main__":
+    main()

baselines/run_validation_td3b.py ADDED Viewed

	@@ -0,0 +1,548 @@

+#!/usr/bin/env python3
+import argparse
+import os
+import sys
+from types import SimpleNamespace
+from typing import Any, Dict, List, Tuple
+import numpy as np
+import pandas as pd
+import torch
+import torch.distributed as dist
+ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if ROOT_DIR not in sys.path:
+    sys.path.insert(0, ROOT_DIR)
+from diffusion import Diffusion
+from configs.finetune_config import (
+    DiffusionConfig,
+    RoFormerConfig,
+    NoiseConfig,
+    TrainingConfig,
+    SamplingConfig,
+    EvalConfig,
+    OptimConfig,
+    MCTSConfig,
+)
+from finetune_utils import load_tokenizer, create_reward_function
+from finetune_multi_target import TargetDataset
+from distributed_utils import setup_distributed, cleanup_distributed, is_main_process
+from scoring.functions.binding import MultiTargetBindingAffinity, TargetSpecificBindingAffinity
+from td3b.direction_oracle import DirectionalOracle
+from utils.app import PeptideAnalyzer
+def _load_checkpoint(ckpt_path: str, device: torch.device) -> Dict[str, Any]:
+    ckpt = torch.load(ckpt_path, map_location=device, weights_only=False)
+    if not isinstance(ckpt, dict):
+        raise ValueError(f"Unsupported checkpoint format: {type(ckpt)}")
+    return ckpt
+def _extract_state_and_config(ckpt: Dict[str, Any]) -> Dict[str, Any]:
+    state_dict = ckpt.get("model_state_dict") or ckpt.get("state_dict") or ckpt
+    config = ckpt.get("config") or {}
+    return {"state_dict": state_dict, "config": config}
+def _build_args(cfg: Dict[str, Any], cli: argparse.Namespace) -> argparse.Namespace:
+    defaults = {
+        "base_path": "To Be Added",
+        "seq_length": 200,
+        "sampling_eps": 1e-3,
+        "total_num_steps": 128,
+        "alpha": 0.1,
+        "hidden_dim": 768,
+        "num_layers": 8,
+        "num_heads": 8,
+        "min_affinity_threshold": 0.0,
+        "sigmoid_temperature": 0.1,
+        "val_samples_per_target": 8,
+        "direction_oracle_esm_name": "facebook/esm2_t33_650M_UR50D",
+        "direction_oracle_esm_cache_dir": None,
+        "direction_oracle_esm_local_files_only": False,
+        "direction_oracle_max_ligand_length": 768,
+        "direction_oracle_max_protein_length": 1024,
+        "direction_oracle_d_model": 256,
+        "direction_oracle_n_heads": 4,
+        "direction_oracle_n_self_attn_layers": 1,
+        "direction_oracle_n_bmca_layers": 2,
+        "direction_oracle_dropout": 0.3,
+    }
+    merged = dict(defaults)
+    merged.update(cfg or {})
+    if cli.base_path is not None:
+        merged["base_path"] = cli.base_path
+    if cli.val_csv is not None:
+        merged["val_csv"] = cli.val_csv
+    if cli.save_path is not None:
+        merged["save_path"] = cli.save_path
+    if cli.device is not None:
+        merged["device"] = cli.device
+    if cli.val_samples_per_target is not None:
+        merged["val_samples_per_target"] = cli.val_samples_per_target
+    if getattr(cli, "num_pool", None) is not None:
+        merged["num_pool"] = cli.num_pool
+    if cli.seq_length is not None:
+        merged["seq_length"] = cli.seq_length
+    if cli.total_num_steps is not None:
+        merged["total_num_steps"] = cli.total_num_steps
+    if cli.sampling_eps is not None:
+        merged["sampling_eps"] = cli.sampling_eps
+    if cli.seed is not None:
+        merged["seed"] = cli.seed
+    args = SimpleNamespace(**merged)
+    base_tr2d2_path = os.path.join(args.base_path, "tr2d2-pep")
+    if not getattr(args, "direction_oracle_ckpt", None):
+        args.direction_oracle_ckpt = os.path.join(base_tr2d2_path, "direction_oracle.pt")
+    if not getattr(args, "direction_oracle_tr2d2_checkpoint", None):
+        args.direction_oracle_tr2d2_checkpoint = os.path.join(
+            base_tr2d2_path, "pretrained", "peptune-pretrained.ckpt"
+        )
+    if not getattr(args, "direction_oracle_tokenizer_vocab", None):
+        args.direction_oracle_tokenizer_vocab = os.path.join(
+            base_tr2d2_path, "tokenizer", "new_vocab.txt"
+        )
+    if not getattr(args, "direction_oracle_tokenizer_splits", None):
+        args.direction_oracle_tokenizer_splits = os.path.join(
+            base_tr2d2_path, "tokenizer", "new_splits.txt"
+        )
+    if not getattr(args, "save_path", None):
+        args.save_path = os.path.join(base_tr2d2_path, "results", "validation_runs")
+    os.makedirs(args.save_path, exist_ok=True)
+    return args
+def _build_model(args: argparse.Namespace, state_dict: Dict[str, Any], device: torch.device) -> Diffusion:
+    config = DiffusionConfig(
+        roformer=RoFormerConfig(
+            hidden_size=args.hidden_dim,
+            n_layers=args.num_layers,
+            n_heads=args.num_heads,
+        ),
+        noise=NoiseConfig(),
+        training=TrainingConfig(sampling_eps=args.sampling_eps),
+        sampling=SamplingConfig(
+            steps=args.total_num_steps,
+            sampling_eps=args.sampling_eps,
+        ),
+        eval_cfg=EvalConfig(),
+        optim=OptimConfig(lr=getattr(args, "learning_rate", 3e-4)),
+        mcts=MCTSConfig(),
+    )
+    tokenizer = load_tokenizer(args.base_path)
+    model = Diffusion(
+        config=config,
+        tokenizer=tokenizer,
+        device=device,
+    ).to(device)
+    load_result = model.load_state_dict(state_dict, strict=False)
+    if load_result.missing_keys:
+        print(f"[load] Missing keys: {len(load_result.missing_keys)}")
+    if load_result.unexpected_keys:
+        print(f"[load] Unexpected keys: {len(load_result.unexpected_keys)}")
+    model.eval()
+    return model
+def _build_oracle(args: argparse.Namespace, device: torch.device) -> DirectionalOracle:
+    oracle = DirectionalOracle(
+        model_ckpt=args.direction_oracle_ckpt,
+        tr2d2_checkpoint=args.direction_oracle_tr2d2_checkpoint,
+        tokenizer_vocab=args.direction_oracle_tokenizer_vocab,
+        tokenizer_splits=args.direction_oracle_tokenizer_splits,
+        esm_name=args.direction_oracle_esm_name,
+        d_model=args.direction_oracle_d_model,
+        n_heads=args.direction_oracle_n_heads,
+        n_self_attn_layers=args.direction_oracle_n_self_attn_layers,
+        n_bmca_layers=args.direction_oracle_n_bmca_layers,
+        dropout=args.direction_oracle_dropout,
+        max_ligand_length=args.direction_oracle_max_ligand_length,
+        max_protein_length=args.direction_oracle_max_protein_length,
+        device=device,
+        esm_cache_dir=args.direction_oracle_esm_cache_dir,
+        esm_local_files_only=args.direction_oracle_esm_local_files_only,
+    )
+    oracle.eval()
+    return oracle
+def _sample_sequences(
+    model: Diffusion,
+    batch_size: int,
+    seq_length: int,
+    total_num_steps: int,
+    sampling_eps: float,
+) -> torch.Tensor:
+    model.backbone.eval()
+    model.noise.eval()
+    x_rollout = model.sample_prior(batch_size, seq_length).to(model.device, dtype=torch.long)
+    timesteps = torch.linspace(1, sampling_eps, total_num_steps + 1, device=model.device)
+    dt = torch.tensor((1 - sampling_eps) / total_num_steps, device=model.device)
+    for i in range(total_num_steps):
+        t = timesteps[i] * torch.ones(x_rollout.shape[0], 1, device=model.device)
+        _, x_next = model.single_reverse_step(x_rollout, t=t, dt=dt)
+        x_rollout = x_next.to(model.device)
+    if (x_rollout == model.mask_index).any().item():
+        _, x_next = model.single_noise_removal(x_rollout, t=t, dt=dt)
+        x_rollout = x_next.to(model.device)
+    return x_rollout
+def _score_sequences(reward_model, sequences: List[str]) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    if not sequences:
+        empty = np.array([], dtype=np.float32)
+        return empty, empty, empty, empty
+    try:
+        result = reward_model(sequences)
+        if isinstance(result, tuple):
+            total_rewards, info = result
+            affinity = np.asarray(info.get("affinities", total_rewards), dtype=np.float32)
+            confidence = np.asarray(info.get("confidences", np.ones_like(affinity)), dtype=np.float32)
+            directions = np.asarray(info.get("directions", np.zeros_like(affinity)), dtype=np.float32)
+        else:
+            total_rewards = np.asarray(result, dtype=np.float32)
+            if total_rewards.ndim > 1:
+                affinity = total_rewards[:, 0]
+            else:
+                affinity = total_rewards
+            confidence = np.ones_like(affinity, dtype=np.float32)
+            directions = np.zeros_like(affinity, dtype=np.float32)
+        return np.asarray(total_rewards, dtype=np.float32), affinity, directions, confidence
+    except Exception:
+        total_rewards = np.full(len(sequences), np.nan, dtype=np.float32)
+        affinity = np.full(len(sequences), np.nan, dtype=np.float32)
+        directions = np.full(len(sequences), np.nan, dtype=np.float32)
+        confidence = np.full(len(sequences), np.nan, dtype=np.float32)
+        for idx, seq in enumerate(sequences):
+            try:
+                result = reward_model([seq])
+                if isinstance(result, tuple):
+                    rewards, info = result
+                    total_rewards[idx] = float(np.asarray(rewards)[0])
+                    affinity[idx] = float(np.asarray(info.get("affinities", rewards))[0])
+                    confidence[idx] = float(np.asarray(info.get("confidences", [np.nan]))[0])
+                    directions[idx] = float(np.asarray(info.get("directions", [np.nan]))[0])
+                else:
+                    reward = np.asarray(result)
+                    total_rewards[idx] = float(reward[0]) if reward.size else np.nan
+                    affinity[idx] = total_rewards[idx]
+            except Exception:
+                continue
+        return total_rewards, affinity, directions, confidence
+def _compute_direction_accuracy(directions: np.ndarray, d_star: float) -> np.ndarray:
+    if directions.size == 0:
+        return directions
+    acc = np.full(directions.shape, np.nan, dtype=np.float32)
+    valid = np.isfinite(directions)
+    if not valid.any():
+        return acc
+    if d_star > 0:
+        acc[valid] = (directions[valid] >= 0.5).astype(np.float32)
+    else:
+        acc[valid] = (directions[valid] < 0.5).astype(np.float32)
+    return acc
+def _nanmean(values: np.ndarray) -> float:
+    return float(np.nanmean(values)) if values.size else float("nan")
+def _nanstd(values: np.ndarray) -> float:
+    return float(np.nanstd(values)) if values.size else float("nan")
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Run TD3B validation from a saved checkpoint.")
+    parser.add_argument("--ckpt_path", required=True, help="Path to saved checkpoint (.ckpt)")
+    parser.add_argument("--val_csv", required=True, help="Validation CSV path")
+    parser.add_argument("--device", default="cuda", help="Device string (e.g., cuda:0 or cpu)")
+    parser.add_argument("--base_path", default=None, help="Base path for TR2-D2")
+    parser.add_argument("--save_path", default=None, help="Output directory for validation CSV")
+    parser.add_argument("--epoch", type=int, default=0, help="Epoch number to label outputs")
+    parser.add_argument("--val_samples_per_target", type=int, default=None, help="Samples per target")
+    parser.add_argument("--num_pool", type=int, default=None,
+                        help="Number of candidate sequences to sample before resampling")
+    parser.add_argument("--seq_length", type=int, default=None, help="Fallback sequence length")
+    parser.add_argument("--total_num_steps", type=int, default=None, help="Diffusion steps")
+    parser.add_argument("--sampling_eps", type=float, default=None, help="Sampling epsilon")
+    parser.add_argument("--seed", type=int, default=None, help="Base random seed")
+    parser.add_argument("--no_resample", action="store_true", help="Disable reward-weighted resampling")
+    parser.add_argument("--resample_without_replacement", action="store_true",
+                        help="Resample without replacement when possible")
+    parser.add_argument("--resample_alpha", type=float, default=None,
+                        help="Override alpha for resampling weights")
+    cli_args = parser.parse_args()
+    rank = int(os.environ.get("LOCAL_RANK", 0))
+    world_size = int(os.environ.get("WORLD_SIZE", 1))
+    if world_size > 1:
+        setup_distributed(rank, world_size)
+        device = torch.device(f"cuda:{rank}" if torch.cuda.is_available() else "cpu")
+    else:
+        device = torch.device(cli_args.device)
+    if cli_args.seed is not None:
+        torch.manual_seed(cli_args.seed + rank)
+        np.random.seed(cli_args.seed + rank)
+    ckpt = _load_checkpoint(cli_args.ckpt_path, device)
+    payload = _extract_state_and_config(ckpt)
+    args = _build_args(payload["config"], cli_args)
+    tokenizer = load_tokenizer(args.base_path)
+    val_dataset = TargetDataset(args.val_csv, tokenizer=tokenizer)
+    policy_model = _build_model(args, payload["state_dict"], device)
+    multi_target_affinity = MultiTargetBindingAffinity(
+        tokenizer=tokenizer,
+        base_path=args.base_path,
+        device=device,
+        emb_model=policy_model.backbone,
+    )
+    directional_oracle = _build_oracle(args, device)
+    analyzer = PeptideAnalyzer()
+    protein_token_cache: Dict[str, torch.Tensor] = {}
+    resample_enabled = not cli_args.no_resample
+    resample_with_replacement = not cli_args.resample_without_replacement
+    resample_alpha = cli_args.resample_alpha if cli_args.resample_alpha is not None else args.alpha
+    all_targets = val_dataset.get_all_targets()
+    if world_size > 1:
+        my_targets = all_targets[rank::world_size]
+    else:
+        my_targets = all_targets
+    records: List[Dict[str, Any]] = []
+    resampled_records: List[Dict[str, Any]] = []
+    resampled_affinity_pos: List[float] = []
+    resampled_affinity_neg: List[float] = []
+    resampled_acc_pos: List[float] = []
+    resampled_acc_neg: List[float] = []
+    resampled_gated_rewards: List[float] = []
+    with torch.no_grad():
+        for target_seq in my_targets:
+            target_protein_tokens = protein_token_cache.get(target_seq)
+            if target_protein_tokens is None:
+                target_protein_tokens = directional_oracle.encode_protein(target_seq)
+                protein_token_cache[target_seq] = target_protein_tokens
+            for direction_name, d_star in [("agonist", 1.0), ("antagonist", -1.0)]:
+                target_length = val_dataset.get_sequence_length(target_seq, direction_name)
+                max_len = 1035
+                if target_length > max_len:
+                    target_length = max_len
+                target_affinity = TargetSpecificBindingAffinity(multi_target_affinity, target_seq)
+                reward_model = create_reward_function(
+                    affinity_predictor=target_affinity,
+                    directional_oracle=directional_oracle,
+                    target_direction=d_star,
+                    target_protein_tokens=target_protein_tokens,
+                    tokenizer=tokenizer,
+                    device=device,
+                    min_affinity_threshold=args.min_affinity_threshold,
+                    use_confidence_weighting=True,
+                    temperature=args.sigmoid_temperature,
+                )
+                pool_size = args.val_samples_per_target
+                if getattr(args, "num_pool", None) is not None:
+                    pool_size = int(args.num_pool)
+                if pool_size < args.val_samples_per_target:
+                    print(
+                        f"[warn] num_pool ({pool_size}) < val_samples_per_target "
+                        f"({args.val_samples_per_target}); using val_samples_per_target."
+                    )
+                    pool_size = args.val_samples_per_target
+                x_eval = _sample_sequences(
+                    policy_model,
+                    batch_size=pool_size,
+                    seq_length=target_length,
+                    total_num_steps=args.total_num_steps,
+                    sampling_eps=args.sampling_eps,
+                )
+                sequences = tokenizer.batch_decode(x_eval)
+                valid_mask = np.array([analyzer.is_peptide(seq) for seq in sequences], dtype=bool)
+                valid_fraction = float(valid_mask.mean()) if valid_mask.size else 0.0
+                gated_rewards, affinities, directions, confidences = _score_sequences(reward_model, sequences)
+                direction_accuracy = _compute_direction_accuracy(directions, d_star)
+                consistency = d_star * (directions - 0.5)
+                success_rate = direction_accuracy * valid_fraction
+                if resample_enabled:
+                    finite_rewards = np.isfinite(gated_rewards)
+                    if np.any(finite_rewards):
+                        rewards_t = torch.as_tensor(gated_rewards[finite_rewards], device=device)
+                        alpha = max(float(resample_alpha), 1e-6)
+                        weights = torch.softmax(rewards_t / alpha, dim=0)
+                        if resample_with_replacement:
+                            num_samples = args.val_samples_per_target
+                            idx = torch.multinomial(weights, num_samples=num_samples, replacement=True)
+                        else:
+                            num_samples = min(args.val_samples_per_target, int(finite_rewards.sum()))
+                            idx = torch.multinomial(weights, num_samples=num_samples, replacement=False)
+                        valid_idx = np.where(finite_rewards)[0]
+                        chosen = valid_idx[idx.detach().cpu().numpy()]
+                        if d_star > 0:
+                            resampled_affinity_pos.extend(affinities[chosen].tolist())
+                            resampled_acc_pos.extend(direction_accuracy[chosen].tolist())
+                        else:
+                            resampled_affinity_neg.extend(affinities[chosen].tolist())
+                            resampled_acc_neg.extend(direction_accuracy[chosen].tolist())
+                        resampled_gated_rewards.extend(gated_rewards[chosen].tolist())
+                        for picked in chosen.tolist():
+                            resampled_records.append({
+                                "target": target_seq[:20],
+                                "sequence": sequences[picked],
+                                "target_direction": d_star,
+                                "is_valid": bool(valid_mask[picked]) if valid_mask.size else False,
+                                "affinity": float(affinities[picked]) if affinities.size else np.nan,
+                                "gated_reward": float(gated_rewards[picked]) if gated_rewards.size else np.nan,
+                                "direction_oracle": float(directions[picked]) if directions.size else np.nan,
+                                "consistency_reward": float(consistency[picked]) if consistency.size else np.nan,
+                                "direction_accuracy": float(direction_accuracy[picked]) if direction_accuracy.size else np.nan,
+                                "success_rate": float(success_rate[picked]) if success_rate.size else np.nan,
+                            })
+                for idx, seq in enumerate(sequences):
+                    records.append({
+                        "target": target_seq[:20],
+                        "sequence": seq,
+                        "target_direction": d_star,
+                        "is_valid": bool(valid_mask[idx]) if valid_mask.size else False,
+                        "affinity": float(affinities[idx]) if affinities.size else np.nan,
+                        "gated_reward": float(gated_rewards[idx]) if gated_rewards.size else np.nan,
+                        "direction_oracle": float(directions[idx]) if directions.size else np.nan,
+                        "consistency_reward": float(consistency[idx]) if consistency.size else np.nan,
+                        "direction_accuracy": float(direction_accuracy[idx]) if direction_accuracy.size else np.nan,
+                        "success_rate": float(success_rate[idx]) if success_rate.size else np.nan,
+                    })
+    if world_size > 1:
+        gathered: List[List[Dict[str, Any]]] = [None for _ in range(world_size)]
+        dist.all_gather_object(gathered, records)
+        if is_main_process():
+            all_records = [item for sub in gathered for item in sub]
+        else:
+            all_records = []
+    else:
+        all_records = records
+    if world_size > 1:
+        gathered_resampled_records: List[List[Dict[str, Any]]] = [None for _ in range(world_size)]
+        dist.all_gather_object(gathered_resampled_records, resampled_records)
+        if is_main_process():
+            all_resampled_records = [item for sub in gathered_resampled_records for item in sub]
+        else:
+            all_resampled_records = []
+    else:
+        all_resampled_records = resampled_records
+    if world_size > 1:
+        resampled_payload = {
+            "aff_pos": resampled_affinity_pos,
+            "aff_neg": resampled_affinity_neg,
+            "acc_pos": resampled_acc_pos,
+            "acc_neg": resampled_acc_neg,
+            "gated": resampled_gated_rewards,
+        }
+        gathered_resampled = [None for _ in range(world_size)]
+        dist.all_gather_object(gathered_resampled, resampled_payload)
+        if is_main_process():
+            resampled_affinity_pos = []
+            resampled_affinity_neg = []
+            resampled_acc_pos = []
+            resampled_acc_neg = []
+            resampled_gated_rewards = []
+            for payload in gathered_resampled:
+                resampled_affinity_pos.extend(payload.get("aff_pos", []))
+                resampled_affinity_neg.extend(payload.get("aff_neg", []))
+                resampled_acc_pos.extend(payload.get("acc_pos", []))
+                resampled_acc_neg.extend(payload.get("acc_neg", []))
+                resampled_gated_rewards.extend(payload.get("gated", []))
+    if is_main_process():
+        df = pd.DataFrame(all_records)
+        output_path = os.path.join(args.save_path, f"validation_epoch_{cli_args.epoch}.csv")
+        df.to_csv(output_path, index=False)
+        print(f"Validation sequences saved to {output_path}")
+        if resample_enabled:
+            if all_resampled_records:
+                resampled_df = pd.DataFrame(all_resampled_records)
+                resampled_path = os.path.join(args.save_path, f"validation_epoch_{cli_args.epoch}_resampled.csv")
+                resampled_df.to_csv(resampled_path, index=False)
+                print(f"Resampled sequences saved to {resampled_path}")
+            else:
+                print("Resampling enabled but no finite rewards were available to select.")
+        if resample_enabled and resampled_gated_rewards:
+            aff_mean_pos = _nanmean(np.asarray(resampled_affinity_pos, dtype=np.float32))
+            aff_std_pos = _nanstd(np.asarray(resampled_affinity_pos, dtype=np.float32))
+            acc_mean_pos = _nanmean(np.asarray(resampled_acc_pos, dtype=np.float32))
+            acc_std_pos = _nanstd(np.asarray(resampled_acc_pos, dtype=np.float32))
+            aff_mean_neg = _nanmean(np.asarray(resampled_affinity_neg, dtype=np.float32))
+            aff_std_neg = _nanstd(np.asarray(resampled_affinity_neg, dtype=np.float32))
+            acc_mean_neg = _nanmean(np.asarray(resampled_acc_neg, dtype=np.float32))
+            acc_std_neg = _nanstd(np.asarray(resampled_acc_neg, dtype=np.float32))
+            gated = np.asarray(resampled_gated_rewards, dtype=np.float32)
+            gated_mean = _nanmean(gated)
+            gated_std = _nanstd(gated)
+        else:
+            def _stats_for_direction(d_star: float) -> Tuple[float, float, float, float]:
+                subset = df[df["target_direction"] == d_star]
+                affinity = subset["affinity"].to_numpy(dtype=np.float32)
+                direction_acc = subset["direction_accuracy"].to_numpy(dtype=np.float32)
+                return _nanmean(affinity), _nanstd(affinity), _nanmean(direction_acc), _nanstd(direction_acc)
+            aff_mean_pos, aff_std_pos, acc_mean_pos, acc_std_pos = _stats_for_direction(1.0)
+            aff_mean_neg, aff_std_neg, acc_mean_neg, acc_std_neg = _stats_for_direction(-1.0)
+            gated = df["gated_reward"].to_numpy(dtype=np.float32)
+            gated_mean = _nanmean(gated)
+            gated_std = _nanstd(gated)
+        print("Validation summary")
+        print(f"  Affinity (d*=1): {aff_mean_pos:.4f} ± {aff_std_pos:.4f}")
+        print(f"  Affinity (d*=-1): {aff_mean_neg:.4f} ± {aff_std_neg:.4f}")
+        print(f"  Direction Accuracy (d*=1): {acc_mean_pos:.4f} ± {acc_std_pos:.4f}")
+        print(f"  Direction Accuracy (d*=-1): {acc_mean_neg:.4f} ± {acc_std_neg:.4f}")
+        print(f"  Gated Reward (overall): {gated_mean:.4f} ± {gated_std:.4f}")
+    if world_size > 1:
+        cleanup_distributed()
+if __name__ == "__main__":
+    main()
+# Running command:
+# CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 --master_port=29501 run_validation_td3b.py --ckpt_path To Be Added --val_csv To Be Added --device cuda:0 --save_path To Be Added --epoch 99 --val_samples_per_target 8 --seed 42 --resample_alpha 0.1

baselines/sampling_setup.py ADDED Viewed

	@@ -0,0 +1,538 @@

+import argparse
+import os
+import sys
+import time
+from dataclasses import dataclass
+from typing import Dict, List, Optional
+import numpy as np
+ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if ROOT_DIR not in sys.path:
+    sys.path.insert(0, ROOT_DIR)
+import torch
+from hydra import compose, initialize_config_dir
+from hydra.core.global_hydra import GlobalHydra
+from diffusion import Diffusion
+from scoring.scoring_functions import ScoringFunctions
+from scoring.functions.binding import MultiTargetBindingAffinity
+from td3b.direction_oracle import DirectionalOracle, resolve_device
+from td3b.data_utils import peptide_seq_to_smiles, smiles_token_length
+from baselines.baselines import (
+    RewardInputs,
+    RewardWrapper,
+    classifier_guidance,
+    peptune_mctg_sampling,
+    sequential_monte_carlo,
+    twisted_diffusion_sampler,
+    unguided_sampling,
+)
+AMINO_ACIDS = "ACDEFGHIKLMNPQRSTVWY"
+@dataclass
+class ProteinTokenizer:
+    aa_to_id: Dict[str, int]
+    pad_id: int = 0
+    @classmethod
+    def default(cls) -> "ProteinTokenizer":
+        aa_to_id = {aa: idx + 1 for idx, aa in enumerate(AMINO_ACIDS)}
+        return cls(aa_to_id=aa_to_id, pad_id=0)
+    def encode(self, seq: str) -> torch.Tensor:
+        ids = [self.aa_to_id.get(aa, self.pad_id) for aa in seq]
+        return torch.tensor([ids], dtype=torch.long)
+def load_base_model(
+    ckpt_path: str,
+    device: str,
+    config_name: str = "peptune_config.yaml",
+) -> Diffusion:
+    GlobalHydra.instance().clear()
+    config_dir = os.path.join(os.path.dirname(__file__), "..", "configs")
+    initialize_config_dir(config_dir=config_dir, job_name="load_model")
+    cfg = compose(config_name=config_name)
+    try:
+        model = Diffusion.load_from_checkpoint(
+            ckpt_path,
+            config=cfg,
+            mode="eval",
+            device=device,
+            map_location=device,
+        )
+        model.eval()
+        return model
+    except Exception as exc:
+        print(f"[load_base_model] Lightning load failed, falling back to raw state_dict: {exc}")
+    checkpoint = torch.load(ckpt_path, map_location=device, weights_only=False)
+    if isinstance(checkpoint, dict):
+        if "model_state_dict" in checkpoint:
+            state_dict = checkpoint["model_state_dict"]
+        elif "state_dict" in checkpoint:
+            state_dict = checkpoint["state_dict"]
+        else:
+            state_dict = checkpoint
+    else:
+        raise ValueError(f"Unsupported checkpoint format: {type(checkpoint)}")
+    model = Diffusion(
+        config=cfg,
+        mode="eval",
+        device=device,
+    )
+    missing, unexpected = model.load_state_dict(state_dict, strict=False)
+    if missing:
+        print(f"[load_base_model] Missing keys: {len(missing)}")
+    if unexpected:
+        print(f"[load_base_model] Unexpected keys: {len(unexpected)}")
+    model.eval()
+    model.to(device)
+    return model
+def load_reward_models(
+    prot_seq: Optional[str],
+    device: str,
+    base_model: Optional[Diffusion] = None,
+    base_path: Optional[str] = None,
+    multi_target: bool = False,
+    score_func_names: Optional[List[str]] = None,
+):
+    if multi_target:
+        if base_model is None or base_path is None:
+            raise ValueError("base_model and base_path are required for multi-target affinity.")
+        return MultiTargetBindingAffinity(
+            tokenizer=base_model.tokenizer,
+            base_path=base_path,
+            device=device,
+            emb_model=base_model.backbone,
+        )
+    if score_func_names is None:
+        score_func_names = [
+            "binding_affinity1",
+            "solubility",
+            "hemolysis",
+            "nonfouling",
+            "permeability",
+        ]
+    if prot_seq is None:
+        raise ValueError("prot_seq is required for single-target scoring.")
+    return ScoringFunctions(score_func_names, prot_seqs=[prot_seq], device=device)
+def load_direction_oracle(args, device: str) -> DirectionalOracle:
+    oracle = DirectionalOracle(
+        model_ckpt=args.direction_oracle_ckpt,
+        tr2d2_checkpoint=args.direction_oracle_tr2d2_checkpoint,
+        tokenizer_vocab=args.direction_oracle_tokenizer_vocab,
+        tokenizer_splits=args.direction_oracle_tokenizer_splits,
+        esm_name=args.direction_oracle_esm_name,
+        d_model=args.direction_oracle_d_model,
+        n_heads=args.direction_oracle_n_heads,
+        n_self_attn_layers=args.direction_oracle_n_self_attn_layers,
+        n_bmca_layers=args.direction_oracle_n_bmca_layers,
+        dropout=args.direction_oracle_dropout,
+        max_ligand_length=args.direction_oracle_max_ligand_length,
+        max_protein_length=args.direction_oracle_max_protein_length,
+        device=device,
+        esm_cache_dir=args.direction_oracle_esm_cache_dir,
+        esm_local_files_only=args.direction_oracle_esm_local_files_only,
+    )
+    oracle.eval()
+    return oracle
+def run_baseline(
+    baseline: str,
+    base_model: Diffusion,
+    reward_fn: RewardWrapper,
+    batch_size: int,
+    seq_length: int,
+    num_steps: int,
+    guidance_scale: float,
+    alpha: float,
+    guidance_steps: Optional[int],
+    mcts_iterations: int,
+    num_children: int,
+    sample_prob_weight: float,
+    invalid_penalty: float,
+    pareto_max_size: Optional[int],
+) -> Dict[str, torch.Tensor]:
+    baseline = baseline.lower()
+    if baseline == "cg":
+        return classifier_guidance(
+            base_model,
+            reward_fn,
+            batch_size=batch_size,
+            seq_length=seq_length,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            guidance_steps=guidance_steps,
+        )
+    if baseline == "unguided":
+        return unguided_sampling(
+            base_model,
+            batch_size=batch_size,
+            seq_length=seq_length,
+            num_steps=num_steps,
+        )
+    if baseline == "smc":
+        return sequential_monte_carlo(
+            base_model,
+            reward_fn,
+            batch_size=batch_size,
+            seq_length=seq_length,
+            num_steps=num_steps,
+            alpha=alpha,
+        )
+    if baseline == "tds":
+        return twisted_diffusion_sampler(
+            base_model,
+            reward_fn,
+            batch_size=batch_size,
+            seq_length=seq_length,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            alpha=alpha,
+            guidance_steps=guidance_steps,
+        )
+    if baseline == "peptune":
+        return peptune_mctg_sampling(
+            base_model,
+            reward_fn,
+            batch_size=batch_size,
+            seq_length=seq_length,
+            num_steps=num_steps,
+            mcts_iterations=mcts_iterations,
+            num_children=num_children,
+            alpha=alpha,
+            sample_prob_weight=sample_prob_weight,
+            invalid_penalty=invalid_penalty,
+            pareto_max_size=pareto_max_size,
+        )
+    raise ValueError(f"Unknown baseline: {baseline}")
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--ckpt_path", type=str, required=True)
+    parser.add_argument("--device", type=str, default="cuda:0")
+    parser.add_argument("--baseline", type=str, default="cg", choices=["cg", "smc", "tds", "unguided", "peptune"])
+    parser.add_argument("--prot_seq", type=str, default=None)
+    parser.add_argument("--targets_csv", type=str, default=None)
+    parser.add_argument("--d_star", type=float, default=1.0)
+    parser.add_argument("--batch_size", type=int, default=32)
+    parser.add_argument("--seq_length", type=int, default=200)
+    parser.add_argument("--binder_seq", type=str, default=None)
+    parser.add_argument("--num_steps", type=int, default=128)
+    parser.add_argument("--guidance_scale", type=float, default=1.0)
+    parser.add_argument("--alpha", type=float, default=0.1)
+    parser.add_argument("--reward_alpha", type=float, default=None)
+    parser.add_argument("--mcts_iterations", type=int, default=20)
+    parser.add_argument("--num_children", type=int, default=24)
+    parser.add_argument("--sample_prob_weight", type=float, default=0.1)
+    parser.add_argument("--invalid_penalty", type=float, default=1.0)
+    parser.add_argument("--pareto_max_size", type=int, default=None)
+    parser.add_argument("--guidance_steps", type=int, default=None)
+    parser.add_argument("--fast_direction", action="store_true", default=False)
+    parser.add_argument("--num_batches", type=int, default=1)
+    parser.add_argument("--output_dir", type=str, default=None)
+    parser.add_argument("--shard_id", type=int, default=None)
+    parser.add_argument("--num_shards", type=int, default=None)
+    parser.add_argument("--direction_oracle_ckpt", type=str, default=None)
+    parser.add_argument("--direction_oracle_tr2d2_checkpoint", type=str, default=None)
+    parser.add_argument("--direction_oracle_tokenizer_vocab", type=str, default=None)
+    parser.add_argument("--direction_oracle_tokenizer_splits", type=str, default=None)
+    parser.add_argument("--direction_oracle_esm_name", type=str, default="facebook/esm2_t33_650M_UR50D")
+    parser.add_argument("--direction_oracle_esm_cache_dir", type=str, default=None)
+    parser.add_argument("--direction_oracle_esm_local_files_only", action="store_true", default=False)
+    parser.add_argument("--direction_oracle_max_ligand_length", type=int, default=768)
+    parser.add_argument("--direction_oracle_max_protein_length", type=int, default=1024)
+    parser.add_argument("--direction_oracle_d_model", type=int, default=256)
+    parser.add_argument("--direction_oracle_n_heads", type=int, default=4)
+    parser.add_argument("--direction_oracle_n_self_attn_layers", type=int, default=1)
+    parser.add_argument("--direction_oracle_n_bmca_layers", type=int, default=2)
+    parser.add_argument("--direction_oracle_dropout", type=float, default=0.3)
+    args = parser.parse_args()
+    rank_env = os.environ.get("LOCAL_RANK")
+    world_env = os.environ.get("WORLD_SIZE")
+    if rank_env is not None or world_env is not None:
+        rank = int(rank_env or 0)
+        world_size = int(world_env or 1)
+    else:
+        rank = int(args.shard_id) if args.shard_id is not None else 0
+        world_size = int(args.num_shards) if args.num_shards is not None else 1
+    if world_size < 1:
+        world_size = 1
+    if world_size > 1 and str(args.device).lower() in {"cuda", "cuda:0", "auto"}:
+        args.device = f"cuda:{rank}"
+    resolved_device = resolve_device(args.device)
+    args.device = str(resolved_device)
+    tr2d2_root = ROOT_DIR
+    if args.direction_oracle_ckpt is None:
+        args.direction_oracle_ckpt = os.path.join(
+            tr2d2_root, "direction_oracle.pt"
+        )
+    if args.direction_oracle_tr2d2_checkpoint is None:
+        args.direction_oracle_tr2d2_checkpoint = os.path.join(
+            tr2d2_root, "pretrained", "peptune-pretrained.ckpt"
+        )
+    if args.direction_oracle_tokenizer_vocab is None:
+        args.direction_oracle_tokenizer_vocab = os.path.join(
+            tr2d2_root, "tokenizer", "new_vocab.txt"
+        )
+    if args.direction_oracle_tokenizer_splits is None:
+        args.direction_oracle_tokenizer_splits = os.path.join(
+            tr2d2_root, "tokenizer", "new_splits.txt"
+        )
+    if args.targets_csv is None and args.prot_seq is None:
+        raise ValueError("--prot_seq is required when --targets_csv is not provided.")
+    base_model = load_base_model(args.ckpt_path, args.device)
+    base_path = os.path.abspath(os.path.join(ROOT_DIR, ".."))
+    multi_target = args.targets_csv is not None
+    scoring_fn = load_reward_models(
+        args.prot_seq if not multi_target else None,
+        args.device,
+        base_model=base_model,
+        base_path=base_path,
+        multi_target=multi_target,
+    )
+    direction_oracle = load_direction_oracle(args, args.device)
+    reward_alpha = args.reward_alpha if args.reward_alpha is not None else args.alpha
+    if args.targets_csv:
+        import pandas as pd
+        df = pd.read_csv(args.targets_csv)
+        if "Target_Sequence" not in df.columns:
+            raise ValueError("targets_csv must contain a 'Target_Sequence' column.")
+        if "Ligand_Sequence" not in df.columns:
+            raise ValueError("targets_csv must contain a 'Ligand_Sequence' column.")
+        targets = []
+        for row_idx, row in df.iterrows():
+            target_seq = str(row["Target_Sequence"]) if pd.notna(row["Target_Sequence"]) else None
+            if not target_seq:
+                continue
+            binder_seq = row["Ligand_Sequence"]
+            if pd.isna(binder_seq):
+                binder_seq = None
+            else:
+                binder_seq = str(binder_seq)
+                if binder_seq.strip() == "":
+                    binder_seq = None
+            targets.append(
+                {
+                    "target_seq": target_seq,
+                    "binder_seq": binder_seq,
+                    "row_index": int(row_idx),
+                }
+            )
+    else:
+        targets = [{"target_seq": args.prot_seq, "binder_seq": args.binder_seq, "row_index": 0}]
+    if world_size > 1:
+        targets = [item for idx, item in enumerate(targets) if idx % world_size == rank]
+        print(f"[shard] rank {rank}/{world_size}: {len(targets)} targets")
+    output_dir = args.output_dir
+    if output_dir is None:
+        output_dir = os.path.join(os.path.dirname(__file__), "outputs")
+    os.makedirs(output_dir, exist_ok=True)
+    from utils.app import PeptideAnalyzer
+    analyzer = PeptideAnalyzer()
+    all_rows = []
+    batch_rows = []
+    metrics_rows = []
+    def resolve_seq_length(binder_seq: Optional[str]) -> int:
+        if not binder_seq:
+            return args.seq_length
+        try:
+            smiles = peptide_seq_to_smiles(binder_seq)
+            if not smiles:
+                return args.seq_length
+            if base_model.tokenizer is None:
+                return len(smiles)
+            return smiles_token_length(smiles, base_model.tokenizer)
+        except Exception as exc:
+            print(f"Warning: failed to derive seq_length from binder_seq; using {args.seq_length}. Error: {exc}")
+            return args.seq_length
+    for target_idx, target_info in enumerate(targets):
+        target_seq = target_info["target_seq"]
+        binder_seq = target_info.get("binder_seq")
+        row_index = target_info.get("row_index", target_idx)
+        seq_length = resolve_seq_length(binder_seq)
+        protein_tokens = direction_oracle.encode_protein(target_seq)
+        for direction_name, d_star in [("agonist", 1.0), ("antagonist", -1.0)]:
+            reward_inputs = RewardInputs(
+                protein_tokens=protein_tokens,
+                d_star=d_star,
+                protein_seq=target_seq,
+            )
+            reward_fn = RewardWrapper(
+                scoring_fn=scoring_fn,
+                direction_oracle=direction_oracle,
+                base_model=base_model,
+                tokenizer=base_model.tokenizer,
+                reward_inputs=reward_inputs,
+                device=torch.device(args.device),
+                fast_direction=args.fast_direction,
+                reward_alpha=reward_alpha,
+            )
+            num_batches = 1 if multi_target else args.num_batches
+            for batch_idx in range(num_batches):
+                start = time.perf_counter()
+                result = run_baseline(
+                    args.baseline,
+                    base_model,
+                    reward_fn,
+                    batch_size=args.batch_size,
+                    seq_length=seq_length,
+                    num_steps=args.num_steps,
+                    guidance_scale=args.guidance_scale,
+                    alpha=args.alpha,
+                    guidance_steps=args.guidance_steps,
+                    mcts_iterations=args.mcts_iterations,
+                    num_children=args.num_children,
+                    sample_prob_weight=args.sample_prob_weight,
+                    invalid_penalty=args.invalid_penalty,
+                    pareto_max_size=args.pareto_max_size,
+                )
+                elapsed = time.perf_counter() - start
+                scores = reward_fn.evaluate_tokens(
+                    result["tokens"],
+                    torch.ones_like(result["tokens"], device=result["tokens"].device),
+                )
+                sequences = scores["sequences"]
+                affinity = scores["affinity"].detach().cpu().numpy()
+                direction = scores["direction"].detach().cpu().numpy()
+                gated_reward = scores["gated_reward"].detach().cpu().numpy()
+                valid_mask = np.array([analyzer.is_peptide(seq) for seq in sequences], dtype=np.float32)
+                valid_fraction = float(valid_mask.mean()) if len(valid_mask) else 0.0
+                consistency = d_star * (direction - 0.5)
+                if d_star > 0:
+                    direction_correct = (direction >= 0.5).astype(np.float32)
+                else:
+                    direction_correct = (direction < 0.5).astype(np.float32)
+                success = direction_correct * valid_mask
+                direction_mean = float(np.mean(direction))
+                direction_std = float(np.std(direction))
+                affinity_mean = float(np.mean(affinity))
+                affinity_std = float(np.std(affinity))
+                consistency_mean = float(np.mean(consistency))
+                consistency_std = float(np.std(consistency))
+                gated_reward_mean = float(np.mean(gated_reward))
+                gated_reward_std = float(np.std(gated_reward))
+                direction_acc_mean = float(np.mean(direction_correct))
+                direction_acc_std = float(np.std(direction_correct))
+                success_rate_mean = float(np.mean(success))
+                success_rate_std = float(np.std(success))
+                batch_metrics = {
+                    "direction_mean": direction_mean,
+                    "direction_std": direction_std,
+                    "affinity_mean": affinity_mean,
+                    "affinity_std": affinity_std,
+                    "consistency_mean": consistency_mean,
+                    "consistency_std": consistency_std,
+                    "gated_reward_mean": gated_reward_mean,
+                    "gated_reward_std": gated_reward_std,
+                    "direction_accuracy_mean": direction_acc_mean,
+                    "direction_accuracy_std": direction_acc_std,
+                    "valid_fraction": valid_fraction,
+                    "success_rate_mean": success_rate_mean,
+                    "success_rate_std": success_rate_std,
+                }
+                for i, seq in enumerate(sequences):
+                    all_rows.append(
+                        {
+                            "rank": rank,
+                            "sequence": seq,
+                            "affinity": float(affinity[i]),
+                            "direction": float(direction[i]),
+                            "d_star": float(d_star),
+                            "direction_name": direction_name,
+                            "target_seq": target_seq,
+                            "target_index": target_idx,
+                            "row_index": row_index,
+                            "binder_seq": binder_seq,
+                            "seq_length": seq_length,
+                            "gated_reward": float(gated_reward[i]),
+                            "consistency_reward": float(consistency[i]),
+                            "direction_accuracy": float(direction_correct[i]),
+                            "valid": float(valid_mask[i]),
+                            "success": float(success[i]),
+                            "batch_index": batch_idx,
+                            "batch_time_sec": elapsed,
+                            **batch_metrics,
+                        }
+                    )
+                batch_rows.append(
+                    {
+                        "rank": rank,
+                        "batch_index": batch_idx,
+                        "batch_time_sec": elapsed,
+                        "target_index": target_idx,
+                        "row_index": row_index,
+                        "binder_seq": binder_seq,
+                        "seq_length": seq_length,
+                        "direction_name": direction_name,
+                    }
+                )
+                metrics_rows.append(
+                    {
+                        "rank": rank,
+                        "target_index": target_idx,
+                        "target_seq": target_seq,
+                        "row_index": row_index,
+                        "binder_seq": binder_seq,
+                        "seq_length": seq_length,
+                        "direction_name": direction_name,
+                        "d_star": float(d_star),
+                        "batch_index": batch_idx,
+                        "num_samples": len(sequences),
+                        **batch_metrics,
+                    }
+                )
+                print(
+                    f"Target {target_idx} dir {direction_name}: "
+                    f"generated {len(sequences)} sequences in {elapsed:.3f}s"
+                )
+    import pandas as pd
+    if world_size > 1:
+        output_csv = os.path.join(output_dir, f"{args.baseline}_samples_rank{rank}.csv")
+        batch_csv = os.path.join(output_dir, f"batch_times_rank{rank}.csv")
+        metrics_csv = os.path.join(output_dir, f"{args.baseline}_metrics_rank{rank}.csv")
+    else:
+        output_csv = os.path.join(output_dir, f"{args.baseline}_samples.csv")
+        batch_csv = os.path.join(output_dir, "batch_times.csv")
+        metrics_csv = os.path.join(output_dir, f"{args.baseline}_metrics.csv")
+    pd.DataFrame(all_rows).to_csv(output_csv, index=False)
+    pd.DataFrame(batch_rows).to_csv(batch_csv, index=False)
+    pd.DataFrame(metrics_rows).to_csv(metrics_csv, index=False)
+    print(f"Saved samples to {output_csv}")
+if __name__ == "__main__":
+    main()

configs/finetune_config.py ADDED Viewed

	@@ -0,0 +1,122 @@

+"""
+Shared Configuration Classes for TD3B Finetuning
+This module contains all configuration dataclasses used by both:
+- finetune_v1.py (single-target training)
+- finetune_multi_target.py (multi-target training)
+Extracted to avoid code duplication and ensure consistency.
+"""
+from dataclasses import dataclass
+from typing import Optional
+@dataclass
+class RoFormerConfig:
+    """Configuration for RoFormer model architecture."""
+    hidden_size: int
+    n_layers: int
+    n_heads: int
+    max_position_embeddings: int = 1035  # Must match pretrained model
+@dataclass(frozen=True)
+class NoiseConfig:
+    """Configuration for noise scheduling."""
+    type: str = 'loglinear'
+    sigma_min: float = 1e-4
+    sigma_max: float = 20.0
+@dataclass(frozen=True)
+class TrainingConfig:
+    """Configuration for training parameters."""
+    sampling_eps: float
+@dataclass(frozen=True)
+class SamplingConfig:
+    """Configuration for sampling parameters."""
+    steps: int
+    sampling_eps: float
+    predictor: str = 'ddpm_cache'
+@dataclass(frozen=True)
+class EvalConfig:
+    """Configuration for evaluation parameters."""
+    gen_ppl_eval_model_name_or_path: str = 'gpt2-large'
+@dataclass(frozen=True)
+class OptimConfig:
+    """Configuration for optimizer parameters."""
+    lr: float
+@dataclass(frozen=True)
+class MCTSConfig:
+    """Configuration for MCTS parameters."""
+    sampling: int = 0  # 0 for Gumbel sampling
+class DiffusionConfig:
+    """
+    Complete configuration for Diffusion model.
+    This class encapsulates all nested configuration objects required
+    by the Diffusion model, providing a clean interface and type safety.
+    """
+    def __init__(
+        self,
+        roformer: RoFormerConfig,
+        noise: NoiseConfig,
+        training: TrainingConfig,
+        sampling: SamplingConfig,
+        eval_cfg: EvalConfig,
+        optim: OptimConfig,
+        mcts: MCTSConfig
+    ):
+        # Create anonymous objects for backward compatibility
+        self.roformer = type('RoFormerObj', (), {
+            'hidden_size': roformer.hidden_size,
+            'n_layers': roformer.n_layers,
+            'n_heads': roformer.n_heads,
+            'max_position_embeddings': roformer.max_position_embeddings
+        })()
+        self.noise = type('NoiseObj', (), {
+            'type': noise.type,
+            'sigma_min': noise.sigma_min,
+            'sigma_max': noise.sigma_max
+        })()
+        self.training = type('TrainingObj', (), {
+            'sampling_eps': training.sampling_eps
+        })()
+        self.sampling = type('SamplingObj', (), {
+            'steps': sampling.steps,
+            'sampling_eps': sampling.sampling_eps,
+            'predictor': sampling.predictor
+        })()
+        self.eval = type('EvalObj', (), {
+            'gen_ppl_eval_model_name_or_path': eval_cfg.gen_ppl_eval_model_name_or_path
+        })()
+        self.optim = type('OptimObj', (), {
+            'lr': optim.lr
+        })()
+        self.mcts = type('MCTSObj', (), {
+            'sampling': mcts.sampling
+        })()
+        # Fixed parameters
+        self.backbone = 'roformer'
+        self.parameterization = 'subs'
+        self.time_conditioning = False
+        self.T = 0

configs/peptune_config.yaml ADDED Viewed

	@@ -0,0 +1,159 @@

+noise:
+  type: loglinear
+  sigma_min: 1e-4
+  sigma_max: 20
+  state_dependent: True
+mode: ppl_eval  # train / ppl_eval / sample_eval
+diffusion: absorbing_state
+vocab: old_smiles # old_smiles / new_smiles / selfies / helm
+backbone: roformer  # peptideclm / helmgpt / dit / roformer / finetune_roformer
+parameterization: subs  # subs
+time_conditioning: False
+T: 0  # 0 (continuous time) / 1000
+subs_masking: False
+seed: 42
+mcts:
+  num_children: 50
+  num_objectives: 5
+  topk: 100
+  mask_token: 4
+  num_iter: 128
+  sampling: 0 # 0 is gumbel sampling / > 0 samples children from top k probs
+  invalid_penalty: 0.5
+  sample_prob: 1.0
+  perm: True
+  dual: False
+  single: False
+  time_dependent: True
+lr_scheduler:
+  _target_: transformers.get_constant_schedule_with_warmup
+  num_warmup_steps: 2500
+data:
+  train: To Be Added
+  valid: To Be Added
+  batchinohup ng: wrapping # padding / wrapping
+loader:
+  global_batch_size: 64
+  eval_global_batch_size: ${.global_batch_size}
+  # Note: batch_size and eval_batch_size are **per machine**
+  batch_size: ${div_up:${.global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}}
+  eval_batch_size: ${div_up:${.eval_global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}}
+  num_workers: ${eval:"len(__import__('os').sched_getaffinity(0))"}
+  pin_memory: True
+sampling:
+  predictor: ddpm_cache  # analytic, ddpm, ddpm_cache
+  num_sequences: 100
+  sampling_eps: 1e-3
+  steps: 128
+  seq_length: 100
+  noise_removal: True
+  num_sample_batches: 2  # Total samples: `num_gpus` * `loader.eval_batch_size` * num_sample_batches
+  num_sample_log: 2
+  stride_length: 1
+  num_strides: 1
+training:
+  antithetic_sampling: True
+  sampling_eps: 1e-3
+  focus_mask: False
+  #dynamic_batching: True
+  accumulator: False
+eval:
+  checkpoint_path:
+  disable_ema: False
+  compute_generative_perplexity: False
+  perplexity_batch_size: 8
+  compute_perplexity_on_sanity: False
+  gen_ppl_eval_model_name_or_path: gpt2-large  # gpt2-large, meta-llama/Llama-2-7b-hf
+  generate_samples: True
+  generation_model:
+optim:
+  weight_decay: 0.075
+  lr: 3e-4
+  beta1: 0.9
+  beta2: 0.999
+  eps: 1e-8
+pepclm:
+  hidden_size: 768
+  cond_dim: 256
+  n_heads: 20
+  n_blocks: 4
+  dropout: 0.5
+  length: 512
+  #scale_by_sigma: True
+model:
+  type: ddit
+  hidden_size: 768
+  cond_dim: 128
+  length: 512
+  n_blocks: 12
+  n_heads: 12
+  scale_by_sigma: True
+  dropout: 0.1
+roformer:
+  hidden_size: 768
+  n_layers: 8
+  n_heads: 8
+  max_position_embeddings: 1035
+helmgpt:
+  hidden_size: 256
+  embd_pdrop: 0.1
+  resid_pdrop: 0.1
+  attn_pdrop: 0.1
+  ff_dropout: 0.
+  block_size: 140
+  n_layer: 8
+  n_heads: 8
+trainer:
+  _target_: lightning.Trainer
+  accelerator: cuda
+  num_nodes: 1
+  devices: ${device_count:}
+  accumulate_grad_batches: ${div_up:${loader.global_batch_size}, ${eval:${trainer.devices} * ${loader.batch_size} * ${trainer.num_nodes}}}
+  gradient_clip_val: 1.0
+  precision: 64-true
+  num_sanity_val_steps: 2
+  max_epochs: 100
+  max_steps: 1_000_000
+  log_every_n_steps: 10
+  limit_train_batches: 1.0   # train on full dataset, can be used to toggle quick run
+  limit_val_batches: 1.0     # validate on full dataset, can be used to toggle quick run
+  #val_check_interval: 40 #954
+  check_val_every_n_epoch: 1
+hydra:
+  run:
+    dir: ./${now:%Y.%m.%d}/
+  job:
+    chdir: True
+checkpointing:
+  # Use custom `save_dir` if, e.g., saving to S3 bucket, otherwise leave this parameter as is
+  save_dir: ${cwd:}
+  # Note: `checkpoints` path should correspond to `checkpoint_every_n_steps.dirpath`
+  resume_from_ckpt: True
+  resume_ckpt_path:
+callbacks:
+  model_checkpoint:
+    _target_: pytorch_lightning.callbacks.ModelCheckpoint
+    every_n_epochs: 1
+    monitor: "val/nll"
+    save_top_k: 10
+    mode: "min"
+    dirpath:

diffusion.py ADDED Viewed

	@@ -0,0 +1,1588 @@

+import numpy as np
+import sys
+import itertools
+import time
+import torch
+from torch import Tensor
+import math
+import torch.nn.functional as F
+import numpy as np
+import random as rd
+import lightning as L
+import torchmetrics
+from dataclasses import dataclass
+import gc
+import utils.utils as utils
+from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
+import noise_schedule
+from torch.optim.lr_scheduler import _LRScheduler
+import roformer as roformer
+from utils.app import PeptideAnalyzer
+import pandas as pd
+base_path = 'To Be Added'
+def _sample_categorical(categorical_probs):
+    gumbel_norm = (
+        1e-10
+        - (torch.rand_like(categorical_probs) + 1e-10).log())
+    return (categorical_probs / gumbel_norm).argmax(dim=-1).to(dtype=torch.long)
+def _sample_categorical_gradient(categorical_probs, temp = 1.0):
+    gumbel_norm = (
+        1e-10 - (torch.rand_like(categorical_probs) + 1e-10).log())
+    output = torch.nn.functional.softmax((torch.log(categorical_probs)-torch.log(gumbel_norm))/temp, 2)
+    return output
+def _unsqueeze(x, reference):
+    return x.view(
+        * x.shape,
+        * ((1,) * (len(reference.shape) - len(x.shape))))
+def sample_batched_categorical(categorical_probs, batch_size):
+    """
+    Generates `m` distinct sequences sampled from categorical probabilities
+    using the Gumbel distribution to ensure randomness while following probabilities
+    Args:
+        categorical_probs (torch.Tensor): tensor of shape (sequence_length, vocab_length)
+                                          representing categorical probabilities
+        m (int): number of distinct sequences to sample
+    Returns:
+        torch.Tensor: tensor of shape (m, sequence_length), where each row is a
+                      distinct sequence of sampled category indices.
+    """
+    _, sequence_length, vocab_size = categorical_probs.shape
+    # add Gumbel noise and sample m sequences
+    gumbel_noise = (-torch.log(-torch.log(torch.rand(batch_size, sequence_length, vocab_size) + 1e-10) + 1e-10)).to(categorical_probs.device)
+    noisy_scores = torch.log(categorical_probs) + gumbel_noise  # add Gumbel noise to log probabilities
+    # select the highest score (most likely category after Gumbel noise)
+    sampled_sequences = noisy_scores.argmax(dim=-1).to(dtype=torch.long)  # shape: (m, sequence_length)
+    return sampled_sequences
+def sample_batched_top_k(categorical_probs, batch_size, k):
+    """
+    Generates `m` sequences sampled from the top-k probabilities of each token
+    using Gumbel noise to ensure randomness and reduce bias towards the most likely options.
+    Args:
+        categorical_probs (torch.Tensor): A tensor of shape (sequence_length, vocab_length)
+                                          representing categorical probabilities.
+        m (int): Number of sequences to sample.
+        k (int): Number of top probabilities to consider for sampling.
+    Returns:
+        torch.Tensor: A tensor of shape (m, sequence_length), where each row is a
+                      sampled sequence of category indices.
+    """
+    _, sequence_length, vocab_length = categorical_probs.shape
+    # Add Gumbel noise to the log probabilities
+    gumbel_noise = -torch.log(-torch.log(torch.rand(batch_size, sequence_length, vocab_length) + 1e-10) + 1e-10).to(categorical_probs.device)
+    noisy_scores = torch.log(categorical_probs[None, :, :]) + gumbel_noise  # Shape: (m, sequence_length, vocab_length)
+    # Get the top-k categories based on noisy scores
+    top_k_scores, top_k_indices = torch.topk(noisy_scores, k, dim=-1)  # Shape: (m, sequence_length, k)
+    # Convert top-k scores back to probabilities and normalize
+    top_k_probs = torch.softmax(top_k_scores, dim=-1).to(categorical_probs.device)  # Shape: (m, sequence_length, k)
+    # Sample randomly from the top-k probabilities
+    sampled_indices_in_top_k = torch.multinomial(top_k_probs.reshape(-1, k), num_samples=1).squeeze(-1).to(categorical_probs.device)
+    sampled_indices_in_top_k = sampled_indices_in_top_k.view(batch_size, sequence_length).to(categorical_probs.device)  # Shape: (batch_size, sequence_length)
+    # Map sampled indices back to the original vocabulary indices
+    sampled_sequences = torch.gather(top_k_indices, -1, sampled_indices_in_top_k.unsqueeze(-1)).squeeze(-1).to(categorical_probs.device).to(dtype=torch.long)
+    return sampled_sequences
+@dataclass
+class Loss:
+  loss: torch.FloatTensor
+  nlls: torch.FloatTensor
+  attn_mask: torch.FloatTensor
+class NLL(torchmetrics.aggregation.MeanMetric):
+  pass
+class BPD(NLL):
+  def compute(self) -> Tensor:
+    """Computes the bits per dimension.
+    Returns:
+      bpd
+    """
+    return self.mean_value / self.weight / math.log(2)
+class Perplexity(NLL):
+  def compute(self) -> Tensor:
+    """Computes the Perplexity.
+    Returns:
+     Perplexity
+    """
+    return torch.exp(self.mean_value / self.weight)
+class Diffusion(L.LightningModule):
+    def __init__(
+        self,
+        config,
+        tokenizer = None,
+        mode="finetune",
+        device=None,
+        ):
+        super().__init__()
+        self.config = config
+        #self.save_hyperparameters()
+        # PeptideCLM tokenizer
+        if tokenizer is None:
+            self.tokenizer = SMILES_SPE_Tokenizer(f'{base_path}/tr2d2-pep/tokenizer/new_vocab.txt',
+                                    f'{base_path}/tr2d2-pep/tokenizer/new_splits.txt')
+        else:
+            self.tokenizer = tokenizer
+        self.vocab_size = self.tokenizer.vocab_size
+        self.mask_index = self.tokenizer.mask_token_id
+        self.sampler = self.config.sampling.predictor
+        self.analyzer = PeptideAnalyzer()
+        # backbone LM PeptideCLM model
+        self.backbone = roformer.Roformer(self.config, self.tokenizer, device=device)
+        if mode == "finetune":
+            self.backbone.freeze_model()
+            self.backbone.unfreeze_n_layers(n=8)
+        elif mode == "eval":
+            self.backbone.freeze_model()
+            self.backbone.requires_grad_(False)
+            self.backbone.eval()
+        elif mode == "train":
+            self.backbone.requires_grad_(True)
+            self.backbone.train()
+        self.neg_infinity = -1000000.0
+        self.T = config.T
+        # noise schedule for non-peptide bond tokens (default to log-linear)
+        self.noise = noise_schedule.get_noise(config)
+        # noise schedule for peptide bonds (log-polynomial)
+        self.bond_noise = noise_schedule.LogPolyNoise()
+        self.time_conditioning = self.config.time_conditioning
+        self.fast_forward_epochs = None
+        self.fast_forward_batches = None
+        self.gen_ppl_eval_model_name_or_path = self.config.eval.gen_ppl_eval_model_name_or_path
+        self.gen_ppl_metric = Perplexity()
+        self.lr = self.config.optim.lr
+        self.sampling_eps = self.config.training.sampling_eps
+        metrics = torchmetrics.MetricCollection({
+            'nll': NLL(),
+            'bpd': BPD(),
+            'ppl': Perplexity(),
+        })
+        metrics.set_dtype(torch.float64)
+        self.train_metrics = metrics.clone(prefix='trainer/')
+        self.valid_metrics = metrics.clone(prefix='val/')
+        self.test_metrics = metrics.clone(prefix='test/')
+    ### FOR THE EXPANSION AND ROLLOUT STEP ###
+    def sample_finetuned_with_rnd(self, args, reward_model, pretrained, eps=1e-5):
+        num_steps = args.total_num_steps
+        B = args.batch_size
+        x_rollout = self.sample_prior(
+            B, args.seq_length).to(self.device)
+        log_rnd = torch.zeros(args.batch_size, device=self.device)
+        timesteps = torch.linspace(1, eps, num_steps + 1, device=self.device)
+        dt = (1 - eps) / num_steps
+        for i in range(num_steps):
+            t = timesteps[i] * torch.ones(x_rollout.shape[0], 1, device=self.device)
+            log_p, x_next, log_policy_step, log_pretrained_step = \
+                self.mcts_reverse_step(x_rollout, t=t, dt=dt, pretrained=pretrained)
+            log_rnd += log_pretrained_step - log_policy_step
+            x_rollout = x_next
+        # if mask token remains, fully unmask
+        mask_positions = (x_rollout == self.mask_index)        # (B, L) bool
+        # does **any** mask remain in any sequence
+        any_mask_global = mask_positions.any().item()  # true if mask remains
+        if any_mask_global:
+            log_p, x_next = self.single_noise_removal(x_rollout, t=t, dt=dt)
+            x_rollout = x_next
+        childSequences = self.tokenizer.batch_decode(x_rollout)
+        # change rewards for peptides
+        valid_x_final = []
+        validSequences = []
+        valid_log_rnd = []
+        for i in range(B):
+            # string sequence
+            childSeq = childSequences[i]
+            # check if the peptide is valid
+            if self.analyzer.is_peptide(childSeq):
+                valid_x_final.append(x_rollout[i])
+                validSequences.append(childSeq)
+                valid_log_rnd.append(log_rnd[i])
+        # compute multi-objective rewards
+        score_vectors = reward_model(input_seqs=validSequences)
+        scalar_rewards = np.sum(score_vectors, axis=-1)
+        scalar_rewards = torch.as_tensor(scalar_rewards, dtype=torch.float32, device=self.device)
+        print(f"scalar reward dim{len(scalar_rewards)}")
+        valid_log_rnd = torch.stack(valid_log_rnd, dim=0)
+        log_rnd = valid_log_rnd + (scalar_rewards / args.alpha) # scale down by alpha
+        valid_x_final = torch.stack(valid_x_final, dim=0)
+        return valid_x_final, log_rnd, scalar_rewards
+    def sample_finetuned(self, args, reward_model, batch_size=None, dataframe=False, eps=1e-5):
+        torch.cuda.empty_cache()
+        self.backbone.eval()
+        self.noise.eval()
+        print(f"device:{self.device}")
+        if batch_size is None:
+            batch_size = args.batch_size
+        num_steps = args.total_num_steps
+        x_rollout = self.sample_prior(
+            batch_size,
+            args.seq_length).to(self.device, dtype=torch.long)
+        timesteps = torch.linspace(1, eps, num_steps + 1, device=self.device)
+        dt = torch.tensor((1 - eps) / num_steps,  device=self.device)
+        for i in range(num_steps):
+            t = timesteps[i] * torch.ones(x_rollout.shape[0], 1, device=self.device)
+            log_p, x_next = self.single_reverse_step(x_rollout, t=t, dt=dt)
+            x_rollout = x_next
+            x_rollout = x_rollout.to(self.device)
+        # if mask token remains, fully unmask
+        mask_positions = (x_rollout == self.mask_index)        # (B, L) bool
+        # does **any** mask remain in any sequence
+        any_mask_global = mask_positions.any().item()  # true if mask remains
+        if any_mask_global:
+            log_p, x_next = self.single_noise_removal(x_rollout, t=t, dt=dt)
+            x_rollout = x_next
+            x_rollout = x_rollout.to(self.device)
+        childSequences = self.tokenizer.batch_decode(x_rollout)
+        valid_x_final = []
+        validSequences = []
+        for idx, seq in enumerate(childSequences):
+            if self.analyzer.is_peptide(seq):
+                valid_x_final.append(x_rollout[idx])
+                validSequences.append(seq)
+        valid_fraction = len(validSequences) / batch_size
+        if (len(validSequences) != 0):
+            # add scores to log
+            result = reward_model(input_seqs=validSequences)
+            # Handle both TD3B (returns tuple) and base ScoringFunctions (returns array directly)
+            if isinstance(result, tuple):
+                # TD3BRewardFunction returns (total_rewards, info) tuple
+                # info contains 'score_vectors' which is (N, 2) array [affinities, total_rewards]
+                total_rewards, info = result
+                affinity = info['affinities']
+                # TD3B doesn't compute sol/hemo/nf/permeability, set to zeros
+                sol = np.zeros_like(affinity)
+                hemo = np.zeros_like(affinity)
+                nf = np.zeros_like(affinity)
+                permeability = np.zeros_like(affinity)
+            else:
+                # Base scoring functions return (N, num_objectives) array directly
+                score_vectors = np.asarray(result)
+                if score_vectors.ndim == 1:
+                    score_vectors = score_vectors[:, None]
+                average_scores = score_vectors.T
+                affinity = average_scores[0] if average_scores.shape[0] > 0 else np.zeros((0,))
+                sol = average_scores[1] if average_scores.shape[0] > 1 else np.zeros_like(affinity)
+                hemo = average_scores[2] if average_scores.shape[0] > 2 else np.zeros_like(affinity)
+                nf = average_scores[3] if average_scores.shape[0] > 3 else np.zeros_like(affinity)
+                permeability = average_scores[4] if average_scores.shape[0] > 4 else np.zeros_like(affinity)
+        else:
+            zeros = [0.0]
+            affinity = zeros
+            sol = zeros
+            hemo = zeros
+            nf = zeros
+            permeability = zeros
+        if dataframe:
+            df = pd.DataFrame({
+                "Peptide Sequence": validSequences,
+                "Binding Affinity": affinity if len(validSequences) else [0.0],
+                "Solubility": sol if len(validSequences) else [0.0],
+                "Hemolysis": hemo if len(validSequences) else [0.0],
+                "Nonfouling": nf if len(validSequences) else [0.0],
+                "Permeability": permeability if len(validSequences) else [0.0],
+            })
+            return x_rollout, affinity, sol, hemo, nf, permeability, valid_fraction, df
+        return x_rollout, affinity, sol, hemo, nf, permeability, valid_fraction
+    def compute_log_policy(self, token_array, x_next, t, dt, attn_mask=None):
+        torch.cuda.empty_cache()
+        self.backbone.eval()
+        self.noise.eval()
+        sigma_t, _ = self.noise(t)
+        if token_array.ndim == 1:
+            token_array = token_array.unsqueeze(0)
+        if x_next.ndim == 1:
+            x_next = x_next.unsqueeze(0)
+        if t.ndim > 1:
+            t = t.squeeze(-1)
+        assert t.ndim == 1
+        change_prob_t = t[:, None, None]
+        change_prob_s = (t - dt)[:, None, None]
+        assert change_prob_t.ndim == 3, change_prob_t.shape
+        if attn_mask is None:
+            attn_mask = torch.ones_like(token_array).to(self.device)
+        log_p = self.forward(token_array, attn_mask=attn_mask, sigma=sigma_t)
+        p_x0 = log_p.exp()
+        assert change_prob_t.ndim == p_x0.ndim
+        q_xs = p_x0 * (change_prob_t - change_prob_s)
+        # zero-masking probability
+        q_xs[:, :, self.mask_index] = change_prob_s[:, :, 0]
+        copy_flag = (token_array != self.mask_index)
+        assert copy_flag.dtype == torch.bool, "copy_flag must be bool"
+        changed_mask = (~copy_flag)
+        # compute the per-sequence log-probability under the pretrained model
+        log_policy_token = log_p.gather(-1, x_next.unsqueeze(-1)).squeeze(-1)
+        unmasked_this_step = (changed_mask & (x_next != self.mask_index)).to(log_policy_token.dtype)
+        log_policy_step = (log_policy_token * unmasked_this_step).sum(dim=-1)
+        # returns:
+        # log_policy_step (B, ) log probability x_next tokens under policy
+        if log_policy_step.ndim == 1:
+            log_policy_step = log_policy_step.squeeze(0)
+        return log_policy_step
+    def single_reverse_step(self, token_array, t, dt, p_x0=None, attn_mask=None):
+        torch.cuda.empty_cache()
+        dev = self.device
+        self.backbone.to(dev).eval()
+        self.noise.eval()
+        t = t.to(dev)
+        dt = torch.as_tensor(dt, device=dev, dtype=t.dtype)
+        assert self.config.noise.type == 'loglinear'
+        sigma_t, _ = self.noise(t)
+        sigma_t = sigma_t.to(dev)
+        if t.ndim > 1:
+            t = t.squeeze(-1)
+        assert t.ndim == 1
+        change_prob_t = t[:, None, None]
+        change_prob_s = (t - dt)[:, None, None]
+        assert change_prob_t.ndim == 3, change_prob_t.shape
+        if attn_mask is None:
+            attn_mask = torch.ones_like(token_array, device=dev, dtype=torch.long)
+        else:
+            attn_mask = attn_mask.to(dev)
+        if p_x0 is None:
+            log_p = self.forward(token_array, attn_mask=attn_mask, sigma=sigma_t)
+            p_x0 = log_p.exp()
+        else:
+            # ensure provided p_x0 is on dev
+            log_p = None
+            p_x0 = p_x0.to(dev)
+        assert change_prob_t.ndim == p_x0.ndim
+        q_xs = p_x0 * (change_prob_t - change_prob_s)
+        # zero-masking probability
+        q_xs[:, :, self.mask_index] = change_prob_s[:, :, 0]
+        x_changed = _sample_categorical(q_xs)
+        if x_changed.device != dev or x_changed.dtype != token_array.dtype:
+            x_changed = x_changed.to(dev, dtype=token_array.dtype)
+        copy_flag = (token_array != self.mask_index)
+        int_copy_flag = copy_flag.to(token_array.dtype)
+        x_next = int_copy_flag * token_array + (1 - int_copy_flag) * x_changed
+        # returns:
+        # log_p (B, L, D) log probabilties of each token under the policy model
+        # x_next (B, L) next sequences
+        return log_p, x_next
+    def single_noise_removal(self, token_array, t, dt, p_x0=None, attn_mask=None):
+        torch.cuda.empty_cache()
+        self.backbone.eval()
+        self.noise.eval()
+        assert self.config.noise.type == 'loglinear'
+        sigma_t, _ = self.noise(t)
+        if t.ndim > 1:
+            t = t.squeeze(-1)
+        assert t.ndim == 1
+        change_prob_t = t[:, None, None]
+        change_prob_s = (t - dt)[:, None, None]
+        assert change_prob_t.ndim == 3, change_prob_t.shape
+        if attn_mask is None:
+            attn_mask = torch.ones_like(token_array).to(self.device)
+        if p_x0 is None:
+            log_p = self.forward(token_array, attn_mask=attn_mask, sigma=sigma_t)
+            p_x0 = log_p.exp()
+        assert change_prob_t.ndim == p_x0.ndim
+        # changed for noise removal
+        p_x0 = p_x0.clone()
+        p_x0[:, :, self.mask_index] = 0.0 # prevent remaining a mask
+        p_x0 = p_x0 / p_x0.sum(dim=-1, keepdim=True).clamp_min(1e-12)  # renorm over non-MASK
+        q_xs = p_x0 * (change_prob_t - change_prob_s)
+        x_changed = _sample_categorical(q_xs)
+        copy_flag = (token_array != self.mask_index)
+        int_copy_flag = copy_flag.to(token_array.dtype)
+        x_next = int_copy_flag * token_array + (1 - int_copy_flag) * x_changed
+        # returns:
+        # log_p (B, L, D) log probabilties of each token under the policy model
+        # x_next (B, L) next sequences
+        return log_p, x_next
+    def mcts_reverse_step(self, token_array, t, dt, pretrained, p_x0=None, attn_mask=None):
+        torch.cuda.empty_cache()
+        self.backbone.eval()
+        self.noise.eval()
+        assert self.config.noise.type == 'loglinear'
+        sigma_t, _ = self.noise(t)
+        if t.ndim > 1:
+            t = t.squeeze(-1)
+        assert t.ndim == 1
+        change_prob_t = t[:, None, None]
+        change_prob_s = (t - dt)[:, None, None]
+        assert change_prob_t.ndim == 3, change_prob_t.shape
+        if attn_mask is None:
+            attn_mask = torch.ones_like(token_array).to(self.device)
+        if p_x0 is None:
+            log_p = self.forward(token_array, attn_mask=attn_mask, sigma=sigma_t)
+            p_x0 = log_p.exp()
+        assert change_prob_t.ndim == p_x0.ndim
+        q_xs = p_x0 * (change_prob_t - change_prob_s)
+        # zero-masking probability
+        q_xs[:, :, self.mask_index] = change_prob_s[:, :, 0]
+        x_changed = _sample_categorical(q_xs)
+        copy_flag = (token_array != self.mask_index)
+        int_copy_flag = copy_flag.to(token_array.dtype)
+        x_next = int_copy_flag * token_array + (1 - int_copy_flag) * x_changed
+        # compute the log-probability under pretrained model at each step
+        with torch.no_grad():
+            # pretrained should output log-probs over vocab at each position given the *parent* (masked) input
+            log_pre = pretrained.forward(token_array, attn_mask=attn_mask, sigma=sigma_t)
+            # log-prob of the *sampled token* at each position
+            log_pre_token = log_pre.gather(-1, x_next.unsqueeze(-1)).squeeze(-1)  # [B*batch,L]
+            # sum only over the sites actually sampled this step (i.e., where parent was mask)
+            assert copy_flag.dtype == torch.bool, "copy_flag must be bool"
+            changed_mask = (~copy_flag)
+            # mask of tokens that were unmasked in this step
+            unmasked_this_step = (changed_mask & (x_next != self.mask_index)).to(log_pre_token.dtype)
+            log_pretrained_step = (log_pre_token * unmasked_this_step).sum(dim=-1)
+        # compute the per-sequence log-probability under the pretrained model
+        log_policy_token = log_p.gather(-1, x_next.unsqueeze(-1)).squeeze(-1)      # [B*batch,L]
+        log_policy_step = (log_policy_token * unmasked_this_step).sum(dim=-1)
+        # returns:
+        # log_p (B, L, D) log probabilties of each token under the policy model
+        # x_next (B, L) next sequences
+        # log_policy_step (B, ) log probability of all unmasked tokens under policy
+        # log_pretrained_step (B, ) log probabiltiy of all unmasked tokens under pretrained model
+        return log_p, x_next, log_policy_step, log_pretrained_step
+    def mcts_noise_removal(self, token_array, t, dt, pretrained, p_x0=None, attn_mask=None):
+        torch.cuda.empty_cache()
+        self.backbone.eval()
+        self.noise.eval()
+        assert self.config.noise.type == 'loglinear'
+        sigma_t, _ = self.noise(t)
+        if t.ndim > 1:
+            t = t.squeeze(-1)
+        assert t.ndim == 1
+        change_prob_t = t[:, None, None]
+        change_prob_s = (t - dt)[:, None, None]
+        assert change_prob_t.ndim == 3, change_prob_t.shape
+        if attn_mask is None:
+            attn_mask = torch.ones_like(token_array).to(self.device)
+        if p_x0 is None:
+            log_p = self.forward(token_array, attn_mask=attn_mask, sigma=sigma_t)
+            p_x0 = log_p.exp()
+        assert change_prob_t.ndim == p_x0.ndim
+        # changed for noise removal
+        p_x0 = p_x0.clone()
+        p_x0[:, :, self.mask_index] = 0.0 # prevent remaining a mask
+        p_x0 = p_x0 / p_x0.sum(dim=-1, keepdim=True).clamp_min(1e-12)  # renorm over non-MASK
+        q_xs = p_x0 * (change_prob_t - change_prob_s)
+        x_changed = _sample_categorical(q_xs)
+        copy_flag = (token_array != self.mask_index)
+        int_copy_flag = copy_flag.to(token_array.dtype)
+        x_next = int_copy_flag * token_array + (1 - int_copy_flag) * x_changed
+        # compute the log-probability under pretrained model at each step
+        with torch.no_grad():
+            # pretrained should output log-probs over vocab at each position given the *parent* (masked) input
+            log_pre = pretrained.forward(token_array, attn_mask=attn_mask, sigma=sigma_t)
+            # log-prob of the *sampled token* at each position
+            log_pre_token = log_pre.gather(-1, x_next.unsqueeze(-1)).squeeze(-1)  # [B*batch,L]
+            # sum only over the sites actually sampled this step (i.e., where parent was mask)
+            assert copy_flag.dtype == torch.bool, "copy_flag must be bool"
+            changed_mask = (~copy_flag)
+            # mask of tokens that were unmasked in this step
+            unmasked_this_step = (changed_mask & (x_next != self.mask_index)).to(log_pre_token.dtype)
+            log_pretrained_step = (log_pre_token * unmasked_this_step).sum(dim=-1)
+        # compute the per-sequence log-probability under the pretrained model
+        log_policy_token = log_p.gather(-1, x_next.unsqueeze(-1)).squeeze(-1)      # [B*batch,L]
+        log_policy_step = (log_policy_token * unmasked_this_step).sum(dim=-1)
+        # returns:
+        # log_p (B, L, D) log probabilties of each token under the policy model
+        # x_next (B, L) next sequences
+        # log_policy_step (B, ) log probability of all unmasked tokens under policy
+        # log_pretrained_step (B, ) log probabiltiy of all unmasked tokens under pretrained model
+        return log_p, x_next, log_policy_step, log_pretrained_step
+    # first step in expansion
+    def batch_mcts_reverse_step(self, token_array, t, dt, batch_size, pretrained, p_x0=None, attn_mask=None):
+        torch.cuda.empty_cache()
+        self.backbone.eval()
+        self.noise.eval()
+        assert self.config.noise.type == 'loglinear'
+        sigma_t, _ = self.noise(t)
+        if t.ndim > 1:
+            t = t.squeeze(-1)
+        assert t.ndim == 1
+        change_prob_t = t[:, None, None]
+        change_prob_s = (t - dt)[:, None, None]
+        assert change_prob_t.ndim == 3, change_prob_t.shape
+        if token_array.dim() == 1:
+            token_array = token_array.unsqueeze(0)
+        # expand to match (num_children, L)
+        if attn_mask is None:
+            attn_mask = torch.ones_like(token_array).to(self.device)
+        token_array = token_array.to(self.device)
+        sigma_t = sigma_t.to(self.device)
+        # ====== INPUT VALIDATION for batch_mcts_reverse_step ======
+        token_min = token_array.min().item()
+        token_max = token_array.max().item()
+        if token_min < 0 or token_max >= self.vocab_size:
+            raise ValueError(
+                f"batch_mcts_reverse_step: Invalid token IDs in token_array: "
+                f"min={token_min}, max={token_max}, vocab_size={self.vocab_size}"
+            )
+        if p_x0 is None:
+            log_p = self.forward(token_array, attn_mask=attn_mask, sigma=sigma_t)
+            p_x0 = log_p.exp()
+        assert change_prob_t.ndim == p_x0.ndim
+        q_xs = p_x0 * (change_prob_t - change_prob_s)
+        # zero-masking probability
+        q_xs[:, :, self.mask_index] = change_prob_s[:, :, 0]
+        # repeat the parent token along the first dimension which will be unmasked into distinct sequences
+        token_array_expanded = token_array.repeat(batch_size, 1)
+        if self.config.mcts.sampling == 0:
+            x_changed = sample_batched_categorical(q_xs.to(self.device), batch_size)
+        else:
+            x_changed = sample_batched_top_k(q_xs.to(self.device), batch_size, self.config.mcts.sampling)
+        copy_flag = (token_array_expanded != self.mask_index)
+        int_copy_flag = copy_flag.to(token_array.dtype)
+        x_children = int_copy_flag * token_array_expanded + (1 - int_copy_flag) * x_changed
+        # compute the log-probability under pretrained model at each step
+        with torch.no_grad():
+            # pretrained should output log-probs over vocab at each position given the *parent* (masked) input
+            log_pre = pretrained.forward(token_array, attn_mask=attn_mask, sigma=sigma_t)
+            # expand to match the shape of x_children
+            log_pre = log_pre.repeat(batch_size, 1, 1)
+            # log-prob of the *sampled token* at each position
+            log_pre_token = log_pre.gather(-1, x_children.unsqueeze(-1)).squeeze(-1)  # [B*batch,L]
+            # sum only over the sites actually sampled this step (i.e., where parent was mask)
+            assert copy_flag.dtype == torch.bool, "copy_flag must be bool"
+            changed_mask = (~copy_flag)
+            # mask of tokens that were unmasked in this step
+            unmasked_this_step = (changed_mask & (x_children != self.mask_index)).to(log_pre_token.dtype)
+            log_pretrained_step = (log_pre_token * unmasked_this_step).sum(dim=-1)
+        # compute the per-child log-probability under the pretrained model
+        log_p = log_p.repeat(batch_size, 1, 1)
+        log_policy_token = log_p.gather(-1, x_children.unsqueeze(-1)).squeeze(-1)  # (B, L) probability of each chosen token
+        #print(log_policy_token)
+        log_policy_step = (log_policy_token * unmasked_this_step).sum(dim=-1)
+        # returns:
+        # log_p (B, L, D) log probabilties of each token under the policy model
+        # x_children (B, L) child sequences
+        # log_policy_step (B, ) log probability of all unmasked tokens under policy
+        # log_pretrained_step (B, ) log probabiltiy of all unmasked tokens under pretrained model
+        return log_p, x_children, log_policy_step, log_pretrained_step
+    def compute_invalid_loss(self, logits, k=None, temp=None):
+        """
+        Penalizes logits that produce invalid sequences using the `is_peptide` function,
+        scaling penalties inversely with token probabilities.
+        Args:
+            logits: Tensor of shape [batch_size, seq_len, vocab_size].
+            k: Number of samples for Gumbel-Rao.
+            temp: Temperature for softmax.
+        Returns:
+            loss: A scalar tensor representing the total loss for invalid sequences.
+        """
+        #samples = self.gumbel_rao(logits, k=k, temp=temp)  # (batch_size, seq_len, vocab_size)
+        # Convert logits to sequences using the tokenizer
+        batch_token_ids = logits.argmax(dim=-1).to(self.device)  # (batch_size, seq_len)
+        sampled_sequences = self.tokenizer.batch_decode(batch_token_ids)
+        # Check validity of each sampled sequence (not differentiable)
+        penalties = torch.tensor(
+            [1 if not self.analyzer.is_peptide(seq) else 0 for seq in sampled_sequences],
+            dtype=torch.float32,
+            device=self.device
+        )
+        #print(penalties)
+        # Compute probabilities for each token (batch_size, seq_length)
+        sampled_probs = torch.softmax(logits, dim=-1).gather(dim=-1, index=batch_token_ids.unsqueeze(-1)).squeeze(-1).to(self.device)
+        # scale penalties by softmax probability of sampled tokens
+        scaled_penalty = penalties[:, None] * sampled_probs # (batch_size, seq_length)
+        return scaled_penalty.to(self.device)
+    ### DIFFUSION LOSS ###
+    def sample_t(self, n, device):
+        """
+            Sample random time steps for batch training
+        """
+        # sample values uniformly at random from [0, 1)
+        eps_t = torch.rand(n, device=device)
+        # antithetic sampling: reduce variance by pairing each sample with complementary sample
+        if self.config.training.antithetic_sampling:
+            # compute interval between sampled time steps
+            offset = torch.arange(n, device=device) / n
+            # ensure that each eps value is evenly spaced between [0, 1)
+            eps_t = ((eps_t / n) + offset) % 1
+        # ensures values are not exactly 0 or 1
+        t = (1 - self.config.training.sampling_eps) * eps_t + self.config.training.sampling_eps
+        return t
+    """def mask_samples(self, x0, mask_prob):
+        # generate array of values in range [0, 1] uniformly at random
+        # will be used to determine which tokens are masked
+        mask_indices = torch.rand(* x0.shape, device=x0.device) # (batch_size, L)
+        # select tokens to mask if the random value in mask_indices is less than mask_prob
+        # this will mask approximately the fraction of tokens indicated by mask_prob
+        zt = torch.where(mask_indices < mask_prob, self.mask_index, x0)
+        return zt"""
+    def q_xt(self, x, mask_prob):
+        """Computes the noisy sample xt.
+        Args:
+        x: int torch.Tensor with shape (batch_size,
+            diffusion_model_input_length), input.
+        move_chance: float torch.Tensor with shape (batch_size, 1).
+        """
+        actual_seq_length = (x != 0).sum(dim=-1, keepdim=True)
+        #print(actual_seq_length)
+        max_mask_length = (actual_seq_length * 0.75).long()
+        mask_indices = torch.rand(*x.shape, device=x.device) < mask_prob
+        restricted_move_indices = torch.zeros_like(mask_indices, dtype=torch.bool)
+        for i in range(x.shape[0]):
+            true_positions = torch.where(mask_indices[i])[0]
+            if len(true_positions) > max_mask_length[i]:
+                selected_positions = true_positions[:max_mask_length[i].item()]
+                restricted_move_indices[i, selected_positions] = True
+            else:
+                restricted_move_indices[i] = mask_indices[i]
+        xt = torch.where(restricted_move_indices, self.tokenizer.mask_token_id, x)
+        return xt
+    def sample_prior(self, *batch_dims):
+        """
+            Returns array of fully masked sequences with same shape as input
+        """
+        return self.mask_index * torch.ones(* batch_dims, dtype=torch.int64)
+    ### COMPUTING LOSS ###
+    def compute_diffusion_loss(self, model_output, xt, x0, t):
+        """
+        Computes diffusion loss term in ELBO
+        (evaluates how accurately the model predicts the token probabilities at each time step)
+        Inputs:
+        - model_output: [sequence length, vocab size, vocab size] array of logits for each token at each sequence position
+        - zt: corrupted version of original input x0 at timestep t
+        - x0: original input sequence
+        - t: timestep
+        """
+        # compute interval between each timestep
+        dt = 1 / self.T
+        # compute vectorized alpha scaling terms for the logits at timestep s and t
+        alpha_t = 1 - t + torch.zeros_like(x0)
+        # s = t - dt
+        alpha_s = 1 - (t - dt) + torch.zeros_like(x0)
+        # gather vector of log-probabilities for each token in x0
+        # log<x_theta, x>
+        log_x_theta_at_x0 = torch.gather(model_output, -1, x0[:, :, None]) # shape (B, L, vocab_size)
+        # gather log-probabillities for assigning a masked token at each position in the sequence at time t
+        # log<x_theta, m>
+        log_x_theta_at_m = model_output[:, :, self.mask_index]
+        # obtain non-log probability of assigning a masked token
+        # <xt, m>
+        x_theta_at_m = log_x_theta_at_m.exp()
+        # first term of diffusion loss
+        term_1_coef = dt / t
+        term_1_log_numerator = torch.log((alpha_t * x_theta_at_m) / t + 1)
+        term_1_log_denom = log_x_theta_at_x0
+        # second term of diffusion loss
+        term_2_coef = 1 - (dt / t)
+        term_2_log_numerator = term_1_log_numerator
+        term_2_log_denom = torch.log((alpha_s * x_theta_at_m) / (t - dt) + 1)
+        L_vb_masked = (term_1_coef * (term_1_log_numerator - term_1_log_denom) +
+                       term_2_coef * (term_2_log_numerator - term_2_log_denom))
+        # multiply by <zt, m> term
+        L_vb = L_vb_masked * (xt == self.mask_index)
+        # scale by T and return
+        return self.T * L_vb
+    def _forward_pass_diffusion(self, x0, attn_mask, bond_mask=None, mask=None):
+        """
+            Training reverse diffusion model x_theta to reconstruct samples x0
+            bond_mask: (batch, seq_length)
+        """
+        # randomly sample time steps to start the denoising process for each x0 in batch
+        t = self.sample_t(x0.shape[0], self.device)
+        # if we are training the intermediate transition blocks
+        if self.T > 0:
+            # scale by total timesteps T and cast to integer
+            t = (t * self.T).to(torch.int)
+            # scale down by T to get a multiple of 1/T
+            t = t / self.T
+            # add 1/T to ensure no 0 values
+            t += (1 / self.T)
+        # get noise and rate of noise at timestep t
+        # sigma = -log(1-t); dsigma = 1 / (1-t)
+        sigma, dsigma = self.noise(t)
+        time_conditioning = sigma[:, None]
+        # Get masking probabilities for all tokens for each batch
+        # log-linear: 1 - alpha = t
+        base_mask_prob = 1 - torch.exp(-sigma[:, None])  # (batch_size, L)
+        if self.config.noise.state_dependent and (bond_mask is not None):
+            # log-polynomial masking schedule: alpha = 1 - t^w
+            # bond_sigma = -log(1-t^w) for w = 3 (default)
+            # bond_dsigma = -wt^(w-1) / (1-t^w)
+            bond_sigma, bond_dsigma = self.bond_noise(t) # scalar
+            # expand dimensions for broadcasting to (B, L)
+            bond_sigma = bond_sigma[:, None]
+            bond_dsigma = bond_dsigma[:, None]
+            sigma = sigma[:, None]
+            dsigma = dsigma[:, None]
+            # compute masking probability for peptide bonds 1 - bond_alpha = t^w
+            bond_mask_prob = 1 - torch.exp(-bond_sigma).to(self.device)
+            # piece together (B, L) tensor with modified masking prob at peptide-bond locations
+            mask_prob = torch.where(bond_mask == 1, bond_mask_prob, base_mask_prob).to(self.device)
+            #print(mask_prob)
+            dsigma = torch.where(bond_mask == 1, bond_dsigma, dsigma).to(self.device)
+            sigma = torch.where(bond_mask == 1, bond_sigma, sigma).to(self.device)
+        else:
+            mask_prob = base_mask_prob.to(self.device)
+        # get masked samples at different timesteps
+        if mask is None:
+            zt = self.q_xt(x0, mask_prob).to(self.device)
+        else:
+            zt = x0.where(mask==1, torch.full_like(x0, self.mask_index)).to(self.device)
+        model_output = self.forward(zt, attn_mask=attn_mask.to(self.device), sigma=time_conditioning).to(self.device)
+        # debugging
+        assert not torch.isnan(model_output).any()
+        assert model_output.is_cuda
+        utils.print_nans(model_output, 'model_output')
+        # compute invalid loss
+        invalid_loss = self.compute_invalid_loss(logits=model_output).to(self.device) # (B, L)
+        #print(invalid_loss)
+        if self.T > 0:
+            # compute diffusion loss
+            diffusion_loss = self.compute_diffusion_loss(model_output, zt, x0, t)
+            return diffusion_loss
+        # compute loss for the final that converts from z0 to x0
+        # -log(p_theta)
+        # get (batch_size, L) array of log-probabilities
+        log_p_theta = torch.gather(input=model_output, dim=-1, index=x0[:, :, None]).squeeze(-1).to(self.device) # (B, L)
+        if self.config.noise.state_dependent and (bond_mask is not None):
+            return (-log_p_theta * (dsigma / torch.expm1(sigma)) + invalid_loss).to(self.device)
+        else:
+            return ((-log_p_theta * (dsigma / torch.expm1(sigma))[:, None]) + invalid_loss).to(self.device)
+    def _loss(self, x0, attn_mask, bond_mask=None, mask=None):
+        loss = self._forward_pass_diffusion(x0, attn_mask, bond_mask, mask)
+        # negative log loss
+        nlls = loss * attn_mask
+        # count number of tokens
+        num_tokens = attn_mask.sum()
+        # compute batch loss
+        batch_nll = nlls.sum()
+        # compute per token loss
+        token_nll = batch_nll / num_tokens
+        # return losses
+        return Loss(loss = token_nll.to(self.device), nlls = nlls.to(self.device), attn_mask = attn_mask.to(self.device))
+    def _compute_loss(self, batch, prefix, bond_mask=None):
+        attn_mask = batch['attention_mask'].to(self.device)
+        if 'mask' in batch:
+            mask = batch['mask'].to(self.device)
+        else:
+            mask = None
+        if 'bond_mask' in batch:
+            bond_mask = batch['bond_mask'].to(self.device)
+        else:
+            bond_mask = None
+        losses = self._loss(batch['input_ids'].to(self.device), attn_mask, bond_mask, mask)
+        loss = losses.loss
+        if prefix == 'train':
+            self.train_metrics.update(
+                losses.nlls.to(self.device),
+                losses.attn_mask.to(self.device)
+            )
+            metrics = self.train_metrics
+        elif prefix == 'val':
+            self.valid_metrics.update(
+                losses.nlls.to(self.device),
+                losses.attn_mask.to(self.device)
+            )
+            metrics = self.valid_metrics
+        elif prefix == 'test':
+            self.test_metrics.update(losses.nlls, losses.attn_mask)
+            metrics = self.test_metrics
+        else:
+            raise ValueError(f'Invalid prefix: {prefix}')
+        self.log_dict(metrics,
+                    on_step=False,
+                    on_epoch=True,
+                    sync_dist=True)
+        return loss
+    ### SAMPLING ###
+    def generate_from_masked(self, num_samples=None, seq_length=None, sample_steps=128, eps=1e-5):
+        # get number of timesteps
+        if sample_steps is None:
+            sample_steps = self.config.sampling.steps
+        if seq_length is None:
+            seq_length = self.config.sampling.seq_length
+        # sample fully masked sequences
+        z = self.sample_prior(num_samples, seq_length).to(self.device)
+        # create vector of sample_steps timesteps
+        timesteps = torch.linspace(1, eps, sample_steps + 1, device=self.device)
+        # compute interval between timesteps
+        dt = (1 - eps) / sample_steps
+        for i in range(sample_steps):
+            t = timesteps[i] * torch.ones(z.shape[0], 1, device=self.device)
+            z = self.single_reverse_step(z, t, dt)
+        return z
+    ### SAMPLING STEP ###
+    """
+    def single_reverse_step(self, zt, t, dt, attn_mask=None):
+        # get sigma values that determine masking prob
+        sigma_t, _ = self.noise(t)
+        sigma_s, _ = self.noise(t - dt)
+        # reshape sigmas
+        if sigma_t.ndim > 1:
+            sigma_t = sigma_t.squeeze(-1)
+        if sigma_s.ndim > 1:
+            sigma_s = sigma_s.squeeze(-1)
+        assert sigma_t.ndim == 1, sigma_t.shape
+        assert sigma_s.ndim == 1, sigma_s.shape
+        # compute masking probabilities for each timestep
+        change_prob_t = 1 - torch.exp(-sigma_t)
+        change_prob_s = 1 - torch.exp(-sigma_s)
+        # expand dimensions
+        change_prob_t = change_prob_t[:, None, None]
+        change_prob_s = change_prob_s[:, None, None]
+        # get prodiction model that outputs token probabilities
+        log_p_x0 = self.forward(zt, attn_mask=attn_mask, sigma=sigma_t)
+        # check dimensions match
+        assert change_prob_t.ndim == log_p_x0.ndim
+        # compute reverse diffusion probability of being unmasked at timestep s
+        # (sigma_s - sigma_t)*x_theta
+        q_zs = log_p_x0.exp() * (change_prob_t - change_prob_s)
+        # compute reverse diffusion probability of remaining masked at timestep s
+        # (1 - sigma_s)*m
+        q_zs[:, :, self.mask_index] = change_prob_s[:, :, 0]
+        # sample sequence at timestep s from categorical distribution of q_zs
+        z_changed = _sample_categorical(q_zs)
+        copy_flag = (zt != self.mask_index).to(zt.dtype)
+        return (copy_flag * zt) + ((1 - copy_flag) * z_changed)"""
+    def cached_reverse_step(self, x, t, dt, p_x0=None, attn_mask=None):
+        assert self.config.noise.type == 'loglinear'
+        sigma_t, _ = self.noise(t)
+        if t.ndim > 1:
+            t = t.squeeze(-1)
+        assert t.ndim == 1
+        change_prob_t = t[:, None, None]
+        change_prob_s = (t - dt)[:, None, None]
+        assert change_prob_t.ndim == 3, change_prob_t.shape
+        if p_x0 is None:
+            p_x0 = self.forward(x, attn_mask=attn_mask, sigma=sigma_t).exp()
+        assert change_prob_t.ndim == p_x0.ndim
+        q_xs = p_x0 * (change_prob_t - change_prob_s)
+        # zero-masking probability
+        q_xs[:, :, self.mask_index] = change_prob_s[:, :, 0]
+        x_changed = _sample_categorical(q_xs)
+        copy_flag = (x != self.mask_index).to(x.dtype)
+        return p_x0, copy_flag * x + (1 - copy_flag) * x_changed
+    # first step in expansion
+    def batch_cached_reverse_step(self, token_array, t, dt, batch_size, p_x0=None, attn_mask=None):
+        """
+        Generates batch_size different samples from the same starting point for the
+        first expansion step of MCTS
+        """
+        assert self.config.noise.type == 'loglinear'
+        sigma_t, _ = self.noise(t)
+        if t.ndim > 1:
+            t = t.squeeze(-1)
+        assert t.ndim == 1
+        change_prob_t = t[:, None, None]
+        change_prob_s = (t - dt)[:, None, None]
+        assert change_prob_t.ndim == 3, change_prob_t.shape
+        if token_array.dim() == 1:
+            token_array = token_array.unsqueeze(0)
+            #token_array = token_array.repeat(batch_size, 1)
+        attn_mask = torch.ones_like(token_array).to(self.device)
+        if p_x0 is None:
+            p_x0 = self.forward(token_array, attn_mask=attn_mask, sigma=sigma_t).exp()
+        assert change_prob_t.ndim == p_x0.ndim
+        q_xs = p_x0 * (change_prob_t - change_prob_s)
+        # zero-masking probability
+        q_xs[:, :, self.mask_index] = change_prob_s[:, :, 0]
+        # repeat the parent token along the first dimension which will be unmasked into distinct sequences
+        token_array = token_array.repeat(batch_size, 1)
+        if self.config.mcts.sampling == 0:
+            x_changed = sample_batched_categorical(q_xs.to(self.device), batch_size)
+        else:
+            x_changed = sample_batched_top_k(q_xs.to(self.device), batch_size, self.config.mcts.sampling)
+        copy_flag = (token_array != self.mask_index).to(token_array.dtype)
+        return p_x0, copy_flag * token_array + (1 - copy_flag) * x_changed
+    def _process_sigma(self, sigma):
+        if sigma.ndim > 1:
+            sigma = sigma.squeeze(-1)
+        if not self.time_conditioning:
+            sigma = torch.zeros_like(sigma)
+        assert sigma.ndim == 1, sigma.shape
+        return sigma
+    def forward(self, zt, attn_mask, sigma):
+        """
+        Predicts the token log-probabilities from zt at time t with noise schedule sigma
+        """
+        sigma = self._process_sigma(sigma)
+        # ====== INPUT VALIDATION (CPU-side) ======
+        # Check 1: Token IDs must be in valid range [0, vocab_size - 1]
+        zt_min = zt.min().item()
+        zt_max = zt.max().item()
+        if zt_min < 0 or zt_max >= self.vocab_size:
+            raise ValueError(
+                f"Invalid token IDs in zt: min={zt_min}, max={zt_max}, "
+                f"vocab_size={self.vocab_size}. Token IDs must be in [0, {self.vocab_size-1}]"
+            )
+        # Check 2: Sequence length must not exceed model's max_position_embeddings
+        seq_len = zt.shape[1]
+        max_pos = getattr(self.backbone.model.config, 'max_position_embeddings', 512)
+        if seq_len > max_pos:
+            raise ValueError(
+                f"Sequence length {seq_len} exceeds model's max_position_embeddings {max_pos}. "
+                f"Input shape: {zt.shape}"
+            )
+        with torch.cuda.amp.autocast(dtype=torch.float32):
+            logits = self.backbone.forward(input_ids=zt, attn_mask=attn_mask).to(self.device)
+        return self.subs_parameterization(logits, zt)
+    def subs_parameterization(self, logits, zt):
+        """
+        Updates reverse diffusion logits based on SUBS parameterization:
+        - zero masking probabilities: -infinity probability of being masked during reverse diffusion
+        - carry-over unmasking: unmasked input tokens remain unchanged during reverse diffusion
+        Args:
+            logits: vector of token probabilities for unmasking masked tokens
+            zt: partially unmasked sequence at current timestep
+        """
+        logits[:, :, self.mask_index] += self.neg_infinity # [sequence index, current token, next token]
+        logits = (logits - torch.logsumexp(logits, dim=-1, keepdim=True)).to(self.device)
+        unmasked_indices = (zt != self.mask_index).to(self.device)  # shape: [200, seq_length]
+        batch_idx, seq_idx = torch.where(unmasked_indices)  # Get explicit indices
+        batch_idx = batch_idx.to(self.device)
+        seq_idx = seq_idx.to(self.device)
+        tokens = zt[batch_idx, seq_idx].to(self.device)  # Get the tokens at those positions
+        #assert logits.is_contiguous(), "logits tensor is not contiguous"
+        #assert unmasked_indices.shape == zt.shape, "same shape"
+        #assert not torch.isnan(logits).any(), "NaN values found in logits"
+        #assert tokens.max() < logits.shape[-1], "token indices out of bounds"
+        #assert batch_idx.max() < logits.shape[0], "batch index out of bounds"
+        #assert seq_idx.max() < logits.shape[1], "seq index out of bounds"
+        #assert batch_idx.device == seq_idx.device == logits.device == tokens.device, "device inconsistent"
+        logits[unmasked_indices] = self.neg_infinity  # Set everything to -inf first
+        # CRITICAL FIX: Clip token indices to valid vocab range to prevent index out of bounds
+        # This can happen with variable-length sequences or corrupted tokens
+        tokens_for_indexing = zt[unmasked_indices]
+        valid_token_mask = tokens_for_indexing < logits.shape[-1]
+        if not valid_token_mask.all():
+            # Log warning about invalid tokens
+            import logging
+            logger = logging.getLogger(__name__)
+            invalid_count = (~valid_token_mask).sum().item()
+            max_invalid_token = tokens_for_indexing[~valid_token_mask].max().item() if invalid_count > 0 else 0
+            logger.warning(f"Found {invalid_count} invalid token indices (max={max_invalid_token}, vocab_size={logits.shape[-1]}). Clipping to valid range.")
+            # Clip to valid range
+            tokens_for_indexing = torch.clamp(tokens_for_indexing, 0, logits.shape[-1] - 1)
+        logits[unmasked_indices, tokens_for_indexing] = 0  # Set only the specific token positions to 0
+        # return logits with SUBS parameterization
+        return logits.to(self.device)
+    """SAMPLING"""
+    @torch.no_grad()
+    def _sample(self, num_steps=None, eps=1e-5, x_input=None):
+        """
+            Generate samples
+        """
+        batch_size_per_gpu = self.config.eval.perplexity_batch_size
+        if num_steps is None:
+            num_steps = self.config.sampling.steps
+        if x_input is not None:
+            x = x_input['input_ids'].to(self.device)
+            attn_mask = x_input['attention_mask'].to(self.device)
+        else:
+            x = self.sample_prior(batch_size_per_gpu, self.config.model.length).to(self.device)
+            attn_mask = torch.ones_like(x).to(self.device)
+        timesteps = torch.linspace(1, eps, num_steps+1, device=self.device)
+        dt = (1 - eps) / num_steps
+        p_x0_cache = None
+        generation_history = [] # used to track which tokens are unmasked
+        for i in range(num_steps):
+            t = timesteps[i] * torch.ones(x.shape[0], 1, device = self.device)
+            if self.sampler == 'ddpm':
+                x = self.single_reverse_step(x, t, dt).to(self.device)
+            elif self.sampler == 'ddpm_cache':
+                p_x0_cache, x_next = self.cached_reverse_step(x, t, dt, p_x0=p_x0_cache, attn_mask=attn_mask)
+                if (not torch.allclose(x_next, x) or self.time_conditioning):
+                    # Disable caching
+                    p_x0_cache = None
+                x = x_next.to(self.device)
+                #print(self.tokenizer.decode(x.squeeze()))
+            else:
+                x = self._analytic_update(x, t, dt, attn_mask).to(self.device)
+        if self.config.sampling.noise_removal:
+            t = timesteps[-1] * torch.ones(x.shape[0], 1, device=self.device)
+            if self.sampler == 'analytic':
+                x = self._denoiser_update(x, t).to(self.device)
+            else:
+                time_conditioning = self.noise(t)[0].to(self.device)
+                x = self.forward(x, attn_mask=attn_mask, sigma=time_conditioning).argmax(dim=-1).to(self.device)
+                #print(self.tokenizer.decode(x.squeeze()))
+        return x.to(self.device)
+    def restore_model_and_sample(self, num_steps, eps=1e-5):
+        """Generate samples from the model."""
+        self.backbone.eval()
+        self.noise.eval()
+        samples = self._sample(num_steps=num_steps, eps=eps)
+        self.backbone.train()
+        self.noise.train()
+        return samples
+    def get_score(self, zt, sigma, attn_mask=None):
+        # score(x, t) = p_t(y) / p_t(x)
+        # => log score(x, t) = log p_t(y) - log p_t(x)
+        # case 1: x = masked
+        #   (i) y = unmasked
+        #     log score(x, t) = log p_\theta(x)|_y + log k
+        #     where k = exp(- sigma) / (1 - exp(- sigma))
+        #   (ii) y = masked
+        #     log score(x, t) = 0
+        # case 2: x = unmasked
+        #   (i) y != masked, y != x
+        #     log score(x_i, t) = - inf
+        #   (ii) y = x
+        #     log score(x_i, t) = 0
+        #   (iii) y = masked token
+        #     log score(x_i, t) = - log k
+        #     where k = exp(- sigma) / (1 - exp(- sigma))
+        model_output = self.forward(zt, attn_mask=attn_mask, sigma=sigma)
+        log_k = -torch.log(torch.expm1(sigma)).squeeze(-1)
+        assert log_k.ndim == 1
+        masked_score = model_output + log_k[:, None, None]
+        masked_score[:, :, self.mask_index] = 0
+        unmasked_score = self.neg_infinity * torch.ones_like(model_output)
+        unmasked_score = torch.scatter(
+            unmasked_score, -1,
+            zt[..., None],
+            torch.zeros_like(unmasked_score[..., :1]))
+        unmasked_score[:, :, self.mask_index] = - (log_k[:, None] * torch.ones_like(zt))
+        masked_indices = (zt == self.mask_index).to(model_output.dtype)[:, :, None]
+        model_output = (masked_score * masked_indices + unmasked_score * (1 - masked_indices))
+        return model_output.exp()
+    def _staggered_score(self, score, dsigma):
+        score = score.clone()
+        extra_const = (1 - dsigma.exp()) * score.sum(dim=-1)
+        score *= dsigma.exp()[:, None]
+        score[..., self.mask_index] += extra_const
+        return score
+    def _analytic_update(self, x, t, step_size, attn_mask=None):
+        curr_sigma, _ = self.noise(t)
+        next_sigma, _ = self.noise(t - step_size)
+        dsigma = curr_sigma - next_sigma
+        score = self.get_score(x, attn_mask, curr_sigma)
+        stag_score = self._staggered_score(score, dsigma)
+        probs = stag_score * self._transp_transition(x, dsigma)
+        return _sample_categorical(probs)
+    def _denoiser_update(self, x, t):
+        sigma, _ = self.noise(t)
+        score = self.get_score(x, sigma)
+        stag_score = self._staggered_score(score, sigma)
+        probs = stag_score * self._transp_transition(x, sigma)
+        probs[..., self.mask_index] = 0
+        samples = _sample_categorical(probs)
+        return samples
+    def _transp_transition(self, i, sigma):
+        sigma = unsqueeze(sigma, reference=i[..., None])
+        edge = torch.exp(-sigma) * F.one_hot(
+        i, num_classes=self.vocab_size)
+        edge += torch.where(i == self.mask_index,
+                            1 - torch.exp(-sigma).squeeze(-1),
+                            0)[..., None]
+        return edge
+    """TRAINING from https://github.com/Dao-AILab/flash-attention/blob/main/training/src/tasks/seq.py"""
+    def on_train_epoch_start(self):
+        torch.cuda.empty_cache()
+        self.backbone.train()
+        self.noise.train()
+    def training_step(self, batch, batch_idx):
+        # Initialize throughput calculation
+        start_time = time.time()
+        if self.config.vocab == 'old_smiles' or self.config.vocab == 'new_smiles':
+            loss = self._compute_loss(batch, prefix='train', bond_mask=batch['bond_mask'])
+        else:
+            loss = self._compute_loss(batch, prefix='train')
+        self.log(name='trainer/loss',
+                value=loss.item(),
+                on_step=True,
+                on_epoch=False,
+                sync_dist=True)
+        # Calculate throughput
+        elapsed_time = time.time() - start_time
+        total_tokens = batch['input_ids'].numel()
+        throughput = total_tokens / elapsed_time
+        self.log(name='trainer/throughput',
+                value=throughput,
+                on_step=True,
+                on_epoch=False,
+                sync_dist=True)
+        return loss
+    def on_load_checkpoint(self, checkpoint):
+        self.fast_forward_epochs = checkpoint['loops']['fit_loop']['epoch_progress']['current']['completed']
+        self.fast_forward_batches = checkpoint['loops']['fit_loop']['epoch_loop.batch_progress']['current']['completed']
+    ### VALIDATION ###
+    def on_validation_epoch_start(self):
+        gc.collect()
+        torch.cuda.empty_cache()
+        self.backbone.eval()
+        self.noise.eval()
+        assert self.valid_metrics.nll.mean_value == 0
+        assert self.valid_metrics.nll.weight == 0
+    def validation_step(self, batch, batch_idx):
+        if self.config.vocab == 'old_smiles' or self.config.vocab == 'new_smiles':
+            loss = self._compute_loss(batch, prefix='val', bond_mask=batch['bond_mask'])
+        else:
+            loss = self._compute_loss(batch, prefix='val')
+        self.log(name='trainer/val_loss',
+                value=loss.item(),
+                on_step=True,
+                on_epoch=False,
+                prog_bar=True,
+                sync_dist=True)
+        return loss
+    def on_validation_epoch_end(self):
+        gc.collect()
+        torch.cuda.empty_cache()
+    ### OPTIMIZATION ###
+    def optimizer_step(self, *args, **kwargs):
+        super().optimizer_step(*args, **kwargs)
+        gc.collect()
+        torch.cuda.empty_cache()
+    def configure_optimizers(self):
+        optimizer = torch.optim.AdamW(
+            itertools.chain(self.backbone.parameters(),self.noise.parameters()),
+            lr=self.config.optim.lr,
+            betas=(self.config.optim.beta1, self.config.optim.beta2),
+            eps=self.config.optim.eps,
+            weight_decay=self.config.optim.weight_decay
+        )
+        self.total_steps = self.config.trainer.max_steps
+        scheduler = CosineWarmup(optimizer,
+                                warmup_steps=self.config.lr_scheduler.num_warmup_steps,
+                                total_steps=self.total_steps)
+        scheduler_dict = {
+            'scheduler': scheduler,
+            'interval': 'step',
+            'frequency': 1,
+            'monitor': 'val/loss',
+            'name': 'trainer/lr'
+        }
+        return [optimizer], [scheduler_dict]
+    @torch.no_grad()
+    def compute_masked_perplexity(self, generated_ids, input_ids):
+        """
+        Computes masked perplexity between array of generated token ids and masked ids that are converted to logits
+        """
+        total_nll = 0
+        total_tokens = 0
+        input_ids = torch.tensor(input_ids).to(self.device)
+        #print(input_ids)
+        for sequence in generated_ids:
+            # tokenize the sequence
+            gt_ids = torch.tensor(sequence).to(self.device)
+            #print(gt_ids)
+            sys.stdout.flush()
+            # forward pass thorugh backbone peptideclm model
+            attn_mask = torch.ones_like(input_ids).to(self.device)
+            # compute logits using backbone
+            if self.config.mode in ['train', 'ppl_eval']:
+                outputs = self.backbone.forward(input_ids=input_ids, attn_mask=attn_mask)
+            elif self.config.mode == 'sample_eval':
+                outputs = self.backbone.forward(input_ids=input_ids, attn_mask=attn_mask)
+            # get logits for each position in sequence across all tokens in vocab
+            #logits = outputs[-1] # (batch_size, seq_length, vocab_size)
+            logits = outputs.view(-1, outputs.size(-1))
+            gt_ids = gt_ids.view(-1)
+            #print(logits.shape)
+            #print(gt_ids.shape)
+            # compute loss
+            # shift_logits = logits[:, :-1, :].contiguous() # remove eos
+            # shift_labels = input_ids[:, 1:].contiguous()
+            # print(masked)
+            loss = F.cross_entropy(logits,
+                                    gt_ids.where(input_ids==self.mask_index, torch.full_like(gt_ids, -100)).view(-1),
+                                    reduction='sum')
+            total_nll += loss.item()
+            # count all non-padding tokens
+            total_tokens += input_ids.ne(self.tokenizer.pad_token_id).sum().item() # count in bos and eos
+        # compute pseudo-perplexity
+        # print(total_nll, ",;,", total_tokens)
+        pseudo_perplexity = torch.exp(torch.tensor(total_nll / total_tokens))
+        self.gen_ppl_metric.update(pseudo_perplexity)
+        return pseudo_perplexity.item()
+def unsqueeze(x, reference):
+    return x.view(* x.shape, * ((1,) * (len(reference.shape) - len(x.shape))))
+class CosineWarmup(_LRScheduler):
+    def __init__(self, optimizer, warmup_steps, total_steps, eta_ratio=0.1, last_epoch=-1):
+        self.warmup_steps = warmup_steps
+        self.total_steps = total_steps
+        self.eta_ratio = eta_ratio  # The ratio of minimum to maximum learning rate
+        super(CosineWarmup, self).__init__(optimizer, last_epoch)
+    def get_lr(self):
+        if self.last_epoch < self.warmup_steps:
+            return [base_lr * self.last_epoch / self.warmup_steps for base_lr in self.base_lrs]
+        progress = (self.last_epoch - self.warmup_steps) / (self.total_steps - self.warmup_steps)
+        cosine_decay = 0.5 * (1 + np.cos(np.pi * progress))
+        decayed_lr = (1 - self.eta_ratio) * cosine_decay + self.eta_ratio
+        return [decayed_lr * base_lr for base_lr in self.base_lrs]

distributed_utils.py ADDED Viewed

	@@ -0,0 +1,28 @@

+"""Minimal distributed training utilities."""
+import os
+import torch
+import torch.distributed as dist
+def setup_distributed(rank: int, world_size: int, backend: str = "nccl") -> None:
+    """Initialize distributed process group."""
+    if world_size <= 1:
+        return
+    os.environ.setdefault("MASTER_ADDR", "localhost")
+    os.environ.setdefault("MASTER_PORT", "29500")
+    dist.init_process_group(backend=backend, rank=rank, world_size=world_size)
+    if torch.cuda.is_available():
+        torch.cuda.set_device(rank)
+def cleanup_distributed() -> None:
+    """Destroy distributed process group."""
+    if dist.is_initialized():
+        dist.destroy_process_group()
+def is_main_process() -> bool:
+    """Check if this is the main (rank 0) process."""
+    if not dist.is_initialized():
+        return True
+    return dist.get_rank() == 0

env.yml ADDED Viewed

	@@ -0,0 +1,37 @@

+name: td3b
+channels:
+  - pytorch
+  - nvidia
+  - conda-forge
+dependencies:
+  - python=3.10
+  - pip
+  - pytorch
+  - torchvision
+  - pytorch-cuda=12.1
+  - rdkit
+  - numpy
+  - pandas
+  - scikit-learn
+  - jupyterlab
+  - matplotlib-base
+  - seaborn
+  - tqdm
+  - pyyaml
+  - pip:
+      - pytorch-lightning==2.5.5
+      - lightning==2.5.5
+      - fair-esm==2.0.0
+      - transformers==4.56.2
+      - SmilesPE==0.0.3
+      - scipy==1.13.1
+      - wandb==0.22.0
+      - hydra-core==1.3.2
+      - hydra-submitit-launcher==1.2.0
+      - pathos==0.3.4
+      - matplotlib==3.10.1
+      - pandas==2.2.2
+      - seaborn==0.13.2
+      - timm==1.0.20
+      - xgboost==3.0.5
+      - loguru==0.7.3

finetune_multi_target.py ADDED Viewed

	@@ -0,0 +1,1061 @@

+"""
+Multi-Target TD3B Fine-Tuning Script
+Trains TD3B on multiple protein targets with random sampling strategy.
+Uses the GPCR directional oracle for direction-aware gating.
+Architecture: Transition-Directed Discrete Diffusion for Binders (TD3B)
+Training: Random K-target sampling + MCTS-guided trajectory optimization + contrastive learning
+Key Features:
+- Random K targets sampled per MCTS round
+- Small-batch training to prevent OOM
+- Periodic validation on held-out targets
+- Checkpoint saving with validation metrics
+"""
+import os
+import sys
+import argparse
+import logging
+import warnings
+from typing import List, Tuple, Dict, Optional
+from dataclasses import dataclass
+from pathlib import Path
+import torch
+import torch.nn as nn
+import numpy as np
+import pandas as pd
+import wandb
+from tqdm import tqdm
+# Add project root to path
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from diffusion import Diffusion
+from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
+from utils.app import PeptideAnalyzer
+from scoring.functions.binding import MultiTargetBindingAffinity, TargetSpecificBindingAffinity
+from td3b.data_utils import peptide_seq_to_smiles, smiles_token_length
+# TD3B imports
+from td3b.td3b_losses import TD3BTotalLoss
+from td3b.td3b_finetune import (
+    extract_embeddings_from_mdlm,
+    add_td3b_sampling_to_model
+)
+from td3b.direction_oracle import DirectionalOracle
+# Import shared configuration classes
+from configs.finetune_config import (
+    RoFormerConfig,
+    NoiseConfig,
+    TrainingConfig,
+    SamplingConfig,
+    EvalConfig,
+    OptimConfig,
+    MCTSConfig,
+    DiffusionConfig
+)
+# Import shared utilities
+from finetune_utils import (
+    load_tokenizer,
+    initialize_device,
+    create_output_directory,
+    save_model,
+    setup_wandb,
+    cleanup_wandb,
+    create_mcts_instance,
+    create_reward_function,
+)
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Suppress warnings
+warnings.filterwarnings('ignore', category=FutureWarning)
+warnings.filterwarnings('ignore', category=UserWarning)
+# Constants
+SEPARATOR_LINE = "=" * 80
+eps = 1e-5
+class TargetDataset:
+    """Dataset handler for multi-target training."""
+    def __init__(self, csv_path: str, tokenizer: Optional[SMILES_SPE_Tokenizer] = None):
+        """
+        Load target dataset from CSV.
+        Args:
+            csv_path: Path to CSV file with columns:
+                - Target_Sequence: Protein target sequence
+                - Ligand_Sequence: Binder sequence (for length reference)
+                - label: 'agonist' or 'antagonist'
+            tokenizer: Tokenizer used to compute SMILES token length
+        """
+        self.df = pd.read_csv(csv_path)
+        logger.info(f"Loaded {len(self.df)} samples from {csv_path}")
+        self.tokenizer = tokenizer
+        # Group by target
+        self.targets = {}
+        for target_seq in self.df['Target_Sequence'].unique():
+            target_df = self.df[self.df['Target_Sequence'] == target_seq]
+            # Get binder lengths for each direction
+            agonist_binders = target_df[target_df['label'] == 'agonist']['Ligand_Sequence'].tolist()
+            antagonist_binders = target_df[target_df['label'] == 'antagonist']['Ligand_Sequence'].tolist()
+            # Store actual sequence lengths
+            agonist_lengths = [self._binder_length(seq) for seq in agonist_binders] if agonist_binders else []
+            antagonist_lengths = [self._binder_length(seq) for seq in antagonist_binders] if antagonist_binders else []
+            # Use most common length for each direction, or average if tied
+            # This ensures we generate sequences similar to the provided data
+            if agonist_lengths:
+                agonist_target_length = int(np.median(agonist_lengths))
+            else:
+                # Default to antagonist length if no agonist, or 50 if neither
+                agonist_target_length = int(np.median(antagonist_lengths)) if antagonist_lengths else 50
+            if antagonist_lengths:
+                antagonist_target_length = int(np.median(antagonist_lengths))
+            else:
+                # Default to agonist length if no antagonist, or 50 if neither
+                antagonist_target_length = int(np.median(agonist_lengths)) if agonist_lengths else 50
+            self.targets[target_seq] = {
+                'sequence': target_seq,
+                'agonist_length': agonist_target_length,  # Target length for agonist generation
+                'antagonist_length': antagonist_target_length,  # Target length for antagonist generation
+                'agonist_count': len(agonist_binders),
+                'antagonist_count': len(antagonist_binders)
+            }
+        logger.info(f"Found {len(self.targets)} unique targets")
+    def _binder_length(self, binder_seq: str) -> int:
+        smiles = peptide_seq_to_smiles(binder_seq)
+        if self.tokenizer is None:
+            return len(smiles)
+        return smiles_token_length(smiles, self.tokenizer)
+    def sample_targets(self, k: int, random_state: Optional[int] = None) -> List[str]:
+        """
+        Randomly sample K targets.
+        Args:
+            k: Number of targets to sample
+            random_state: Random seed for reproducibility
+        Returns:
+            List of target sequences
+        """
+        if random_state is not None:
+            np.random.seed(random_state)
+        target_seqs = list(self.targets.keys())
+        k = min(k, len(target_seqs))
+        return np.random.choice(target_seqs, size=k, replace=False).tolist()
+    def get_target_info(self, target_seq: str) -> Dict:
+        """Get information for a specific target."""
+        return self.targets[target_seq]
+    def get_sequence_length(self, target_seq: str, direction: str) -> int:
+        """
+        Get the target sequence length for generation.
+        Args:
+            target_seq: Target protein sequence
+            direction: 'agonist' or 'antagonist'
+        Returns:
+            Target binder sequence length
+        """
+        target_info = self.targets[target_seq]
+        if direction == 'agonist' or direction == 1.0 or direction == '+1':
+            return target_info['agonist_length']
+        else:  # antagonist
+            return target_info['antagonist_length']
+    def get_all_targets(self) -> List[str]:
+        """Get all target sequences."""
+        return list(self.targets.keys())
+def run_validation(
+    policy_model: Diffusion,
+    multi_target_affinity: MultiTargetBindingAffinity,
+    directional_oracle: DirectionalOracle,
+    tokenizer: SMILES_SPE_Tokenizer,
+    val_dataset: TargetDataset,
+    args: argparse.Namespace,
+    epoch: int,
+    device: torch.device,
+    protein_token_cache: Optional[Dict[str, torch.Tensor]] = None
+) -> Dict:
+    """
+    Run validation on all targets in validation dataset.
+    Args:
+        policy_model: Trained diffusion model
+        affinity_predictor: Binding affinity predictor
+        directional_oracle: Directional oracle
+        tokenizer: Tokenizer
+        val_dataset: Validation dataset
+        args: Training arguments
+        epoch: Current epoch
+        device: Device
+    Returns:
+        Dictionary with validation metrics
+    """
+    logger.info(f"\n{SEPARATOR_LINE}")
+    logger.info(f"Running validation at epoch {epoch}")
+    logger.info(f"{SEPARATOR_LINE}")
+    policy_model.eval()
+    all_sequences = []
+    all_affinities = []
+    all_gated_rewards = []
+    all_directions = []
+    all_target_directions = []  # d* for each sequence
+    all_valid_fractions = []
+    all_valid_fractions_per_sample = []
+    all_target_names = []
+    val_targets = val_dataset.get_all_targets()
+    if protein_token_cache is None:
+        protein_token_cache = {}
+    with torch.no_grad():
+        for target_seq in tqdm(val_targets, desc="Validating targets"):
+            target_info = val_dataset.get_target_info(target_seq)
+            target_protein_tokens = protein_token_cache.get(target_seq)
+            if target_protein_tokens is None:
+                target_protein_tokens = directional_oracle.encode_protein(target_seq)
+                protein_token_cache[target_seq] = target_protein_tokens
+            # Generate for both agonist and antagonist
+            for direction_name, d_star in [('agonist', 1.0), ('antagonist', -1.0)]:
+                # Get the target sequence length for this direction
+                target_length = val_dataset.get_sequence_length(target_seq, direction_name)
+                # Temporarily set args.seq_length for this generation
+                original_seq_length = args.seq_length
+                args.seq_length = target_length
+                # Create target-specific affinity predictor for this target
+                target_affinity = TargetSpecificBindingAffinity(multi_target_affinity, target_seq)
+                # Create reward model for this target+direction
+                reward_model = create_reward_function(
+                    affinity_predictor=target_affinity,
+                    directional_oracle=directional_oracle,
+                    target_direction=d_star,
+                    target_protein_tokens=target_protein_tokens,
+                    tokenizer=tokenizer,
+                    device=device,
+                    min_affinity_threshold=args.min_affinity_threshold,
+                    use_confidence_weighting=True,
+                    temperature=args.sigmoid_temperature
+                )
+                # Sample sequences with the correct length
+                x_eval, eval_metrics = policy_model.sample_finetuned_td3b(
+                    args,
+                    reward_model,
+                    batch_size=args.val_samples_per_target,
+                    dataframe=False
+                )
+                # Restore original seq_length
+                args.seq_length = original_seq_length
+                # Decode sequences
+                sequences = tokenizer.batch_decode(x_eval)
+                # Get metrics
+                affinities = eval_metrics.get('affinity', [])
+                gated_rewards = eval_metrics.get('gated_reward', [])
+                directions = eval_metrics.get('direction_predictions', [])
+                valid_fraction = eval_metrics.get('valid_fraction', 0.0)
+                # CRITICAL FIX: Metrics are only computed for valid sequences
+                # So we should extend based on the length of metrics arrays, not all sequences
+                num_valid = len(affinities)  # Number of valid sequences with metrics
+                # Filter to only valid sequences (metrics are only for valid ones)
+                from utils.app import PeptideAnalyzer
+                analyzer = PeptideAnalyzer()
+                valid_sequences = [seq for seq in sequences if analyzer.is_peptide(seq)][:num_valid]
+                # Store (all arrays must have the same length = num_valid)
+                all_sequences.extend(valid_sequences)  # Only valid sequences
+                all_affinities.extend(affinities)
+                all_gated_rewards.extend(gated_rewards)
+                all_directions.extend(directions)
+                all_target_directions.extend([d_star] * num_valid)
+                all_valid_fractions.append(valid_fraction)
+                all_valid_fractions_per_sample.extend([valid_fraction] * num_valid)
+                all_target_names.extend([target_seq[:20]] * num_valid)
+    # Compute validation metrics
+    all_affinities = np.array(all_affinities)
+    all_gated_rewards = np.array(all_gated_rewards)
+    all_directions = np.array(all_directions)
+    all_target_directions = np.array(all_target_directions)
+    if all_directions.size == 0:
+        direction_correct = np.array([], dtype=np.float32)
+    else:
+        direction_correct = np.where(
+            all_target_directions > 0,
+            all_directions >= 0.5,
+            all_directions < 0.5
+        ).astype(np.float32)
+    # Consistency rewards: d* × (f_φ - 0.5)
+    consistency_rewards = all_target_directions * (all_directions - 0.5)  # range from -1 to 1.
+    success_rates = direction_correct * np.array(all_valid_fractions_per_sample, dtype=np.float32)
+    # Separate by direction
+    agonist_mask = all_target_directions == 1.0
+    antagonist_mask = all_target_directions == -1.0
+    consistency_agonist = consistency_rewards[agonist_mask]
+    consistency_antagonist = consistency_rewards[antagonist_mask]
+    val_metrics = {
+        'affinity_mean': np.mean(all_affinities),
+        'affinity_std': np.std(all_affinities),
+        'gated_reward_mean': np.mean(all_gated_rewards),
+        'gated_reward_std': np.std(all_gated_rewards),
+        'direction_oracle_mean': np.mean(all_directions),
+        'direction_oracle_std': np.std(all_directions),
+        'consistency_reward_mean': np.mean(consistency_rewards),
+        'consistency_reward_std': np.std(consistency_rewards),
+        'consistency_agonist_mean': np.mean(consistency_agonist) if len(consistency_agonist) > 0 else 0.0,
+        'consistency_agonist_std': np.std(consistency_agonist) if len(consistency_agonist) > 0 else 0.0,
+        'consistency_antagonist_mean': np.mean(consistency_antagonist) if len(consistency_antagonist) > 0 else 0.0,
+        'consistency_antagonist_std': np.std(consistency_antagonist) if len(consistency_antagonist) > 0 else 0.0,
+        'valid_fraction_mean': np.mean(all_valid_fractions),
+        'valid_fraction_std': np.std(all_valid_fractions),
+        'direction_accuracy_mean': np.mean(direction_correct) if direction_correct.size else 0.0,
+        'direction_accuracy_std': np.std(direction_correct) if direction_correct.size else 0.0,
+        'success_rate_mean': np.mean(success_rates) if success_rates.size else 0.0,
+        'success_rate_std': np.std(success_rates) if success_rates.size else 0.0
+    }
+    # Log validation metrics
+    logger.info(f"\nValidation Results (Epoch {epoch}):")
+    logger.info(f"  Affinity: {val_metrics['affinity_mean']:.4f} ± {val_metrics['affinity_std']:.4f}")
+    logger.info(f"  Gated Reward: {val_metrics['gated_reward_mean']:.4f} ± {val_metrics['gated_reward_std']:.4f}")
+    logger.info(f"  Direction Oracle: {val_metrics['direction_oracle_mean']:.4f} ± {val_metrics['direction_oracle_std']:.4f}")
+    logger.info(f"  Consistency Reward: {val_metrics['consistency_reward_mean']:.4f} ± {val_metrics['consistency_reward_std']:.4f}")
+    logger.info(f"  Consistency (d*=+1): {val_metrics['consistency_agonist_mean']:.4f} ± {val_metrics['consistency_agonist_std']:.4f}")
+    logger.info(f"  Consistency (d*=-1): {val_metrics['consistency_antagonist_mean']:.4f} ± {val_metrics['consistency_antagonist_std']:.4f}")
+    logger.info(f"  Valid Fraction: {val_metrics['valid_fraction_mean']:.4f} ± {val_metrics['valid_fraction_std']:.4f}")
+    logger.info(f"  Direction Accuracy: {val_metrics['direction_accuracy_mean']:.4f} ± {val_metrics['direction_accuracy_std']:.4f}")
+    logger.info(f"  Success Rate: {val_metrics['success_rate_mean']:.4f} ± {val_metrics['success_rate_std']:.4f}")
+    # Save validation sequences to file
+    val_df = pd.DataFrame({
+        'target': all_target_names,
+        'sequence': all_sequences,
+        'target_direction': all_target_directions,
+        'affinity': all_affinities,
+        'gated_reward': all_gated_rewards,
+        'direction_oracle': all_directions,
+        'consistency_reward': consistency_rewards,
+        'direction_accuracy': direction_correct,
+        'success_rate': success_rates
+    })
+    val_output_path = os.path.join(args.save_path, f'validation_epoch_{epoch}.csv')
+    val_df.to_csv(val_output_path, index=False)
+    logger.info(f"Validation sequences saved to {val_output_path}")
+    policy_model.train()
+    return val_metrics
+def parse_args():
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(description='Multi-Target TD3B Fine-Tuning')
+    # Paths
+    path_group = parser.add_argument_group('Paths')
+    path_group.add_argument('--base_path', type=str, required=True,
+                           help='Base path for TR2-D2 project')
+    path_group.add_argument('--train_csv', type=str, required=True,
+                           help='Path to training CSV file')
+    path_group.add_argument('--val_csv', type=str, default=None,
+                           help='Path to validation CSV file (optional)')
+    path_group.add_argument('--pretrained_checkpoint', type=str, required=True,
+                           help='Path to pretrained diffusion model checkpoint')
+    path_group.add_argument('--run_name', type=str, required=True,
+                           help='Name for this training run')
+    path_group.add_argument('--device', type=str, default='cuda',
+                           help='Device to use (cuda or cpu)')
+    # Multi-target sampling
+    target_group = parser.add_argument_group('Multi-Target Sampling')
+    target_group.add_argument('--targets_per_mcts', type=int, default=5,
+                             help='Number of targets to sample per MCTS round (K)')
+    target_group.add_argument('--resample_targets_every', type=int, default=1,
+                             help='Resample targets every N epochs')
+    # Training hyperparameters
+    train_group = parser.add_argument_group('Training')
+    train_group.add_argument('--num_epochs', type=int, default=200,
+                            help='Total number of training epochs')
+    train_group.add_argument('--learning_rate', type=float, default=3e-4,
+                            help='Learning rate for optimizer')
+    train_group.add_argument('--train_batch_size', type=int, default=16,
+                            help='Batch size for training (small to prevent OOM)')
+    train_group.add_argument('--gradient_accumulation_steps', type=int, default=4,
+                            help='Accumulate gradients over N steps')
+    train_group.add_argument('--resample_every_n_step', type=int, default=10,
+                            help='Resample MCTS every N epochs')
+    train_group.add_argument('--save_every_n_epochs', type=int, default=20,
+                            help='Save checkpoint every N epochs')
+    train_group.add_argument('--validate_every_n_epochs', type=int, default=20,
+                            help='Run validation every N epochs')
+    train_group.add_argument('--num_epoch_for_sampling', type=int, default=5,
+                            help='Run evaluation sampling every N epochs (set <=0 to disable)')
+    train_group.add_argument('--reset_every_n_step', type=int, default=50,
+                            help='Reset MCTS tree every N epochs')
+    # MCTS hyperparameters
+    mcts_group = parser.add_argument_group('MCTS')
+    mcts_group.add_argument('--num_iter', type=int, default=50,
+                           help='MCTS iterations per resample (v1 default: 50, reduce for multi-target)')
+    mcts_group.add_argument('--num_children', type=int, default=30,
+                           help='Children per MCTS expansion')
+    mcts_group.add_argument('--buffer_size', type=int, default=50,
+                           help='Pareto buffer size (v1 default: 50)')
+    mcts_group.add_argument('--replay_buffer_size', type=int, default=0,
+                           help='Max replay buffer size across resamples (0 disables replay)')
+    mcts_group.add_argument('--replay_buffer_strategy', type=str, default='fifo',
+                           choices=['fifo', 'random'],
+                           help='Replay buffer eviction strategy when full')
+    mcts_group.add_argument('--alpha', type=float, default=0.1,
+                           help='Temperature for importance weighting')
+    mcts_group.add_argument('--exploration', type=float, default=1.0,
+                           help='UCB exploration constant')
+    # TD3B loss hyperparameters
+    loss_group = parser.add_argument_group('TD3B Loss')
+    loss_group.add_argument('--contrastive_weight', type=float, default=0.1,
+                           help='Weight for contrastive loss (v1 default: 0.1)')
+    loss_group.add_argument('--contrastive_margin', type=float, default=1.0,
+                           help='Margin for contrastive loss')
+    loss_group.add_argument('--contrastive_type', type=str, default='triplet',
+                           choices=['triplet', 'ntxent', 'supcon'],
+                           help='Type of contrastive loss')
+    loss_group.add_argument('--kl_beta', type=float, default=0.1,
+                           help='KL divergence regularization coefficient (v1 default: 0.1)')
+    loss_group.add_argument('--min_affinity_threshold', type=float, default=0.0,
+                           help='Minimum affinity threshold for allosteric control (CRITICAL)')
+    loss_group.add_argument('--sigmoid_temperature', type=float, default=0.1,
+                           help='Temperature for sigmoid gating')
+    # Validation
+    val_group = parser.add_argument_group('Validation')
+    val_group.add_argument('--val_samples_per_target', type=int, default=20,
+                          help='Number of sequences to generate per target during validation')
+    # Architecture
+    arch_group = parser.add_argument_group('Architecture')
+    arch_group.add_argument('--seq_length', type=int, default=200,
+                           help='Maximum sequence length')
+    arch_group.add_argument('--embedding_pool_method', type=str, default='cls',
+                           choices=['cls', 'mean', 'max'],
+                           help='Pooling method for embeddings')
+    arch_group.add_argument('--hidden_dim', type=int, default=768,
+                           help='Hidden dimension size')
+    arch_group.add_argument('--num_layers', type=int, default=8,
+                           help='Number of transformer layers (v1 default: 8)')
+    arch_group.add_argument('--num_heads', type=int, default=8,
+                           help='Number of attention heads (v1 default: 8)')
+    arch_group.add_argument('--sampling_eps', type=float, default=1e-3,
+                           help='Sampling epsilon (v1 default: 1e-3)')
+    arch_group.add_argument('--total_num_steps', type=int, default=128,
+                           help='Total number of diffusion steps (v1 default: 128)')
+    # Optimization
+    opt_group = parser.add_argument_group('Optimization')
+    opt_group.add_argument('--grad_clip', action='store_true',
+                          help='Enable gradient clipping')
+    opt_group.add_argument('--gradnorm_clip', type=float, default=1.0,
+                          help='Gradient norm clipping threshold')
+    opt_group.add_argument('--wdce_num_replicates', type=int, default=16,
+                          help='Number of replicates for WDCE loss (v1 default: 16)')
+    opt_group.add_argument('--centering', action='store_true',
+                          help='Enable centering in WDCE loss')
+    # Logging
+    log_group = parser.add_argument_group('Logging')
+    log_group.add_argument('--wandb_project', type=str, default='TD3B-multi-target',
+                          help='W&B project name')
+    log_group.add_argument('--wandb_entity', type=str, default='phos_zj',
+                          help='W&B entity name')
+    # Directional oracle
+    oracle_group = parser.add_argument_group('Directional Oracle')
+    oracle_group.add_argument('--direction_oracle_ckpt', type=str, default=None,
+                             help='Path to directional oracle checkpoint')
+    oracle_group.add_argument('--direction_oracle_tr2d2_checkpoint', type=str, default=None,
+                             help='Path to TR2D2 checkpoint used by the oracle')
+    oracle_group.add_argument('--direction_oracle_tokenizer_vocab', type=str, default=None,
+                             help='Path to SMILES tokenizer vocab for oracle')
+    oracle_group.add_argument('--direction_oracle_tokenizer_splits', type=str, default=None,
+                             help='Path to SMILES tokenizer splits for oracle')
+    oracle_group.add_argument('--direction_oracle_esm_name', type=str,
+                             default='facebook/esm2_t33_650M_UR50D',
+                             help='ESM model name or local path')
+    oracle_group.add_argument('--direction_oracle_esm_cache_dir', type=str, default=None,
+                             help='Optional cache directory for ESM model')
+    oracle_group.add_argument('--direction_oracle_esm_local_files_only', action='store_true',
+                             help='Load ESM from local cache only (no network)')
+    oracle_group.add_argument('--direction_oracle_max_ligand_length', type=int, default=768,
+                             help='Max SMILES token length for oracle')
+    oracle_group.add_argument('--direction_oracle_max_protein_length', type=int, default=1024,
+                             help='Max protein token length for oracle')
+    oracle_group.add_argument('--direction_oracle_d_model', type=int, default=256,
+                             help='Oracle hidden dimension (must match checkpoint)')
+    oracle_group.add_argument('--direction_oracle_n_heads', type=int, default=4,
+                             help='Oracle attention heads (must match checkpoint)')
+    oracle_group.add_argument('--direction_oracle_n_self_attn_layers', type=int, default=1,
+                             help='Oracle self-attention layers (must match checkpoint)')
+    oracle_group.add_argument('--direction_oracle_n_bmca_layers', type=int, default=2,
+                             help='Oracle cross-attention layers (must match checkpoint)')
+    oracle_group.add_argument('--direction_oracle_dropout', type=float, default=0.3,
+                             help='Oracle dropout (must match checkpoint)')
+    args = parser.parse_args()
+    # Resolve default oracle paths relative to base_path
+    base_tr2d2_path = os.path.join(args.base_path, 'tr2d2-pep')
+    if args.direction_oracle_ckpt is None:
+        args.direction_oracle_ckpt = os.path.join(
+            base_tr2d2_path, 'best_model_tr2d2_gpcr_fixed.pt'
+        )
+    if args.direction_oracle_tr2d2_checkpoint is None:
+        args.direction_oracle_tr2d2_checkpoint = os.path.join(
+            base_tr2d2_path, 'pretrained', 'peptune-pretrained.ckpt'
+        )
+    if args.direction_oracle_tokenizer_vocab is None:
+        args.direction_oracle_tokenizer_vocab = os.path.join(
+            base_tr2d2_path, 'tokenizer', 'new_vocab.txt'
+        )
+    if args.direction_oracle_tokenizer_splits is None:
+        args.direction_oracle_tokenizer_splits = os.path.join(
+            base_tr2d2_path, 'tokenizer', 'new_splits.txt'
+        )
+    # Add derived attributes (required by MCTS)
+    args.time_conditioning = False
+    args.num_obj = 5  # Must match padded score vector size
+    args.scalarization = "sum"
+    # Create save path
+    args.save_path = create_output_directory(
+        args.base_path,
+        args.run_name,
+        add_timestamp=True
+    )
+    return args
+def main():
+    args = parse_args()
+    logger.info(f"\n{SEPARATOR_LINE}")
+    logger.info("Multi-Target TD3B Fine-Tuning")
+    logger.info(f"{SEPARATOR_LINE}\n")
+    # Set device
+    device = initialize_device(args.device)
+    # Initialize W&B
+    setup_wandb(
+        project=args.wandb_project,
+        name=args.run_name,
+        config=vars(args),
+        entity=args.wandb_entity
+    )
+    # Tokenizer
+    tokenizer = load_tokenizer(args.base_path)
+    # Load datasets
+    logger.info("\n[1/6] Loading datasets...")
+    train_dataset = TargetDataset(args.train_csv, tokenizer=tokenizer)
+    val_dataset = TargetDataset(args.val_csv, tokenizer=tokenizer) if args.val_csv else None
+    # Load models
+    logger.info("\n[2/6] Loading models...")
+    # Create diffusion config
+    config = DiffusionConfig(
+        roformer=RoFormerConfig(
+            hidden_size=args.hidden_dim,
+            n_layers=args.num_layers,
+            n_heads=args.num_heads
+        ),
+        noise=NoiseConfig(),
+        training=TrainingConfig(sampling_eps=args.sampling_eps),
+        sampling=SamplingConfig(
+            steps=args.total_num_steps,
+            sampling_eps=args.sampling_eps
+        ),
+        eval_cfg=EvalConfig(),
+        optim=OptimConfig(lr=args.learning_rate),
+        mcts=MCTSConfig()
+    )
+    # Policy model
+    policy_model = Diffusion(
+        config=config,
+        tokenizer=tokenizer,
+        device=device
+    ).to(device)
+    # Load pretrained checkpoint
+    checkpoint = torch.load(args.pretrained_checkpoint, map_location=device, weights_only=False)
+    # Handle different checkpoint formats (like v1)
+    CHECKPOINT_KEYS = ('state_dict', 'model_state_dict')
+    state_dict = None
+    for key in CHECKPOINT_KEYS:
+        if key in checkpoint:
+            state_dict = checkpoint[key]
+            logger.info(f"Loading checkpoint from key: {key}")
+            break
+    if state_dict is None:
+        # Assume checkpoint is already a state_dict
+        state_dict = checkpoint
+        logger.info("Loading checkpoint as direct state_dict")
+    policy_model.load_state_dict(state_dict, strict=False)
+    logger.info(f"Loaded pretrained checkpoint from {args.pretrained_checkpoint}")
+    # Reference model (frozen)
+    reference_model = Diffusion(
+        config=config,
+        tokenizer=tokenizer,
+        device=device
+    ).to(device)
+    reference_model.load_state_dict(state_dict, strict=False)
+    reference_model.eval()
+    for param in reference_model.parameters():
+        param.requires_grad = False
+    logger.info("Created reference model (frozen)")
+    # Add TD3B sampling method, fix bugs, sampling sequences with w(t) as condition
+    policy_model = add_td3b_sampling_to_model(policy_model)
+    # Multi-target affinity predictor
+    multi_target_affinity = MultiTargetBindingAffinity(
+        tokenizer=tokenizer,
+        base_path=args.base_path,
+        device=device,
+        emb_model=policy_model.backbone  # Use backbone Roformer model (matches v1)
+    )
+    logger.info("Created multi-target binding affinity predictor")
+    # Directional oracle (GPCR classifier)
+    for path_label, path in [
+        ("direction_oracle_ckpt", args.direction_oracle_ckpt),
+        ("direction_oracle_tr2d2_checkpoint", args.direction_oracle_tr2d2_checkpoint),
+        ("direction_oracle_tokenizer_vocab", args.direction_oracle_tokenizer_vocab),
+        ("direction_oracle_tokenizer_splits", args.direction_oracle_tokenizer_splits),
+    ]:
+        if not os.path.isfile(path):
+            raise FileNotFoundError(f"Missing {path_label}: {path}")
+    directional_oracle = DirectionalOracle(
+        model_ckpt=args.direction_oracle_ckpt,
+        tr2d2_checkpoint=args.direction_oracle_tr2d2_checkpoint,
+        tokenizer_vocab=args.direction_oracle_tokenizer_vocab,
+        tokenizer_splits=args.direction_oracle_tokenizer_splits,
+        esm_name=args.direction_oracle_esm_name,
+        d_model=args.direction_oracle_d_model,
+        n_heads=args.direction_oracle_n_heads,
+        n_self_attn_layers=args.direction_oracle_n_self_attn_layers,
+        n_bmca_layers=args.direction_oracle_n_bmca_layers,
+        dropout=args.direction_oracle_dropout,
+        max_ligand_length=args.direction_oracle_max_ligand_length,
+        max_protein_length=args.direction_oracle_max_protein_length,
+        device=device,
+        esm_cache_dir=args.direction_oracle_esm_cache_dir,
+        esm_local_files_only=args.direction_oracle_esm_local_files_only
+    )
+    directional_oracle.eval()
+    protein_token_cache: Dict[str, torch.Tensor] = {}
+    def get_protein_tokens(target_seq: str) -> torch.Tensor:
+        cached = protein_token_cache.get(target_seq)
+        if cached is None:
+            cached = directional_oracle.encode_protein(target_seq)
+            protein_token_cache[target_seq] = cached
+        return cached
+    # Loss function
+    logger.info("\n[3/6] Creating loss function...")
+    td3b_loss_fn = TD3BTotalLoss(
+        contrastive_weight=args.contrastive_weight,
+        contrastive_margin=args.contrastive_margin,
+        kl_beta=args.kl_beta,
+        reference_model=reference_model,
+        adaptive_margin=True
+    )
+    # WDCE loss
+    from finetune_utils import loss_wdce
+    logger.info("\n[4/6] Setting up training...")
+    policy_model.train()
+    torch.set_grad_enabled(True)
+    optimizer = torch.optim.AdamW(policy_model.parameters(), lr=args.learning_rate)
+    # Training logs
+    batch_losses = []
+    batch_wdce_losses = []
+    batch_contrastive_losses = []
+    batch_kl_losses = []
+    # Multi-target buffer
+    # We'll store sequences from all sampled targets here
+    buffer_sequences = []  # List of (x, log_rnd, reward, directional_label, confidence)
+    current_targets = []
+    def trim_replay_buffer(items, max_size, strategy):
+        if max_size <= 0 or len(items) <= max_size:
+            return items
+        if strategy == "fifo":
+            return items[-max_size:]
+        indices = np.random.choice(len(items), size=max_size, replace=False)
+        return [items[i] for i in indices]
+    logger.info(f"\n{SEPARATOR_LINE}")
+    logger.info("Starting Training")
+    logger.info(f"{SEPARATOR_LINE}\n")
+    # Training loop
+    pbar = tqdm(range(args.num_epochs))
+    for epoch in pbar:
+        # Sample new targets if needed
+        if epoch % args.resample_targets_every == 0 or len(current_targets) == 0:
+            current_targets = train_dataset.sample_targets(
+                k=args.targets_per_mcts,
+                random_state=epoch
+            )
+            logger.info(f"\nEpoch {epoch}: Sampled {len(current_targets)} targets for training")
+        # MCTS sampling phase (less frequent) - this is when we regenerate sequences
+        if epoch % args.resample_every_n_step == 0:
+            if args.replay_buffer_size <= 0:
+                # Clear buffer only when regenerating with new MCTS if replay is disabled
+                buffer_sequences = []
+            else:
+                logger.info(
+                    f"Epoch {epoch}: Replay buffer enabled, keeping {len(buffer_sequences)} sequences before refresh"
+                )
+            logger.info(f"Epoch {epoch}: Running MCTS for {len(current_targets)} targets...")
+            mcts_valid_total = 0
+            mcts_run_count = 0
+            mcts_empty_runs = 0
+            with torch.no_grad():
+                for target_seq in current_targets:
+                    target_info = train_dataset.get_target_info(target_seq)
+                    # Sample both agonist and antagonist
+                    for direction_name, d_star in [('agonist', 1.0), ('antagonist', -1.0)]:
+                        # Get the target sequence length for this direction
+                        target_length = train_dataset.get_sequence_length(target_seq, direction_name)
+                        # Temporarily set args.seq_length for this generation
+                        original_seq_length = args.seq_length
+                        args.seq_length = target_length
+                        # Create target-specific affinity predictor for this target
+                        target_affinity = TargetSpecificBindingAffinity(multi_target_affinity, target_seq)
+                        # Create reward model for this target
+                        reward_model = create_reward_function(
+                            affinity_predictor=target_affinity,
+                            directional_oracle=directional_oracle,
+                            target_direction=d_star,
+                            target_protein_tokens=get_protein_tokens(target_seq),
+                            tokenizer=tokenizer,
+                            device=device,
+                            min_affinity_threshold=args.min_affinity_threshold,
+                            use_confidence_weighting=True,
+                            temperature=args.sigmoid_temperature
+                        )
+                        # Create MCTS using shared utility
+                        mcts = create_mcts_instance(
+                            args=args,
+                            policy_model=policy_model,
+                            reward_function=reward_model,
+                            tokenizer=tokenizer,
+                            buffer_size=args.buffer_size
+                        )
+                        # Run MCTS
+                        reset_tree = (epoch % args.reset_every_n_step == 0)
+                        results = mcts.forward(resetTree=reset_tree)
+                        # Restore original seq_length
+                        args.seq_length = original_seq_length
+                        # Unpack results
+                        if len(results) == 7:
+                            x_final, log_rnd, final_rewards, score_vectors, sequences, directional_labels, confidences = results
+                            # Skip if MCTS returned empty buffer (no valid sequences found)
+                            if len(x_final) == 0:
+                                logger.warning(f"MCTS returned empty buffer for target={target_seq[:20]}, direction={direction_name}")
+                                mcts_run_count += 1
+                                mcts_empty_runs += 1
+                                continue
+                            mcts_run_count += 1
+                            mcts_valid_total += len(sequences)
+                            # Add to buffer
+                            for i in range(len(x_final)):
+                                buffer_sequences.append({
+                                    'x': x_final[i],
+                                    'log_rnd': log_rnd[i],
+                                    'reward': final_rewards[i],
+                                    'directional_label': d_star,
+                                    'confidence': confidences[i] if isinstance(confidences, np.ndarray) else 1.0
+                                })
+            if args.replay_buffer_size > 0:
+                buffer_sequences = trim_replay_buffer(
+                    buffer_sequences,
+                    args.replay_buffer_size,
+                    args.replay_buffer_strategy
+                )
+            logger.info(
+                f"Epoch {epoch}: MCTS runs={mcts_run_count}, "
+                f"valid_sequences={mcts_valid_total}, empty_runs={mcts_empty_runs}"
+            )
+            logger.info(f"Epoch {epoch}: Buffer size: {len(buffer_sequences)} sequences")
+        # Training phase: sample mini-batches from buffer
+        if len(buffer_sequences) == 0:
+            logger.warning(f"Epoch {epoch}: Buffer is empty, skipping training")
+            continue
+        # Shuffle buffer
+        np.random.shuffle(buffer_sequences)
+        # Mini-batch training
+        num_batches = max(1, len(buffer_sequences) // args.train_batch_size)
+        epoch_loss = 0.0
+        epoch_wdce_loss = 0.0
+        epoch_contrastive_loss = 0.0
+        epoch_kl_loss = 0.0
+        optimizer.zero_grad()
+        for batch_idx in range(num_batches):
+            start_idx = batch_idx * args.train_batch_size
+            end_idx = min(start_idx + args.train_batch_size, len(buffer_sequences))
+            batch_data = buffer_sequences[start_idx:end_idx]
+            # Pad sequences to the same length (efficient batching for variable-length sequences)
+            # Use padding to handle different sequence lengths from different targets
+            x_list = [item['x'] for item in batch_data]
+            log_rnd_list = [item['log_rnd'] for item in batch_data]  # Scalars, not vectors!
+            # Pad x_batch: pad with mask_index (typically 0 or a special token)
+            mask_index = policy_model.mask_index if hasattr(policy_model, 'mask_index') else 0
+            max_len = max(x.shape[0] for x in x_list)
+            # Create padded tensors
+            x_batch = torch.full(
+                (len(x_list), max_len),
+                fill_value=mask_index,
+                dtype=x_list[0].dtype,
+                device=device
+            )
+            # Create attention mask: 1 for real tokens, 0 for padding
+            # This tells the model which positions are valid vs padded
+            attn_mask = torch.zeros(
+                (len(x_list), max_len),
+                dtype=torch.long,
+                device=device
+            )
+            # Fill in the real sequences and mark valid positions
+            for i, x in enumerate(x_list):
+                seq_len = x.shape[0]
+                x_batch[i, :seq_len] = x.to(device)
+                attn_mask[i, :seq_len] = 1  # Mark valid positions
+            # log_rnd is a SCALAR per sequence, not a vector - just stack them
+            log_rnd_batch = torch.stack([lr.to(device) if isinstance(lr, torch.Tensor) else torch.tensor(lr, device=device) for lr in log_rnd_list])
+            directional_labels_batch = torch.tensor(
+                [item['directional_label'] for item in batch_data],
+                dtype=torch.float32,
+                device=device
+            )
+            # WDCE loss (with attention mask to handle variable-length sequences)
+            wdce_loss = loss_wdce(
+                policy_model,
+                log_rnd_batch,
+                x_batch,
+                num_replicates=args.wdce_num_replicates,
+                centering=args.centering,
+                attn_mask=attn_mask  # Pass attention mask to avoid computing loss on padding
+            )
+            # KL loss
+            mask_index = policy_model.mask_index
+            lamda = torch.rand(x_batch.shape[0], device=device)
+            sigma_kl = -torch.log1p(-(1 - eps) * lamda)
+            masked_index = torch.rand(*x_batch.shape, device=device) < lamda[..., None]
+            perturbed_batch = torch.where(masked_index, mask_index, x_batch)
+            # Use the actual attention mask (not all ones) to handle variable-length sequences
+            attn_mask_kl = attn_mask.to(device)
+            kl_loss = td3b_loss_fn.compute_kl_loss(
+                policy_model,
+                perturbed_batch,
+                attn_mask_kl,
+                sigma_kl
+            )
+            # Contrastive loss (if we have multiple directions)
+            if len(torch.unique(directional_labels_batch)) > 1:
+                embeddings = extract_embeddings_from_mdlm(
+                    policy_model,
+                    x_batch,
+                    pool_method=args.embedding_pool_method
+                )
+                debug_mode = (epoch < 3) or (epoch > 0 and batch_contrastive_losses and batch_contrastive_losses[-1] < 1e-6)
+                total_loss, loss_dict = td3b_loss_fn.compute_loss(
+                    wdce_loss,
+                    embeddings,
+                    directional_labels_batch,
+                    kl_loss=kl_loss,
+                    debug=debug_mode
+                )
+            else:
+                # Only WDCE + KL if no contrastive
+                total_loss = wdce_loss + args.kl_beta * kl_loss
+                loss_dict = {
+                    'total_loss': total_loss.item(),
+                    'wdce_loss': wdce_loss.item(),
+                    'contrastive_loss': 0.0,
+                    'kl_loss': kl_loss.item()
+                }
+            # Scale loss for gradient accumulation
+            scaled_loss = total_loss / args.gradient_accumulation_steps
+            scaled_loss.backward()
+            # Accumulate losses
+            epoch_loss += loss_dict['total_loss']
+            epoch_wdce_loss += loss_dict['wdce_loss']
+            epoch_contrastive_loss += loss_dict['contrastive_loss']
+            epoch_kl_loss += loss_dict['kl_loss']
+            # Gradient accumulation
+            if (batch_idx + 1) % args.gradient_accumulation_steps == 0 or (batch_idx + 1) == num_batches:
+                if args.grad_clip:
+                    torch.nn.utils.clip_grad_norm_(policy_model.parameters(), args.gradnorm_clip)
+                optimizer.step()
+                optimizer.zero_grad()
+        # Average losses
+        epoch_loss /= num_batches
+        epoch_wdce_loss /= num_batches
+        epoch_contrastive_loss /= num_batches
+        epoch_kl_loss /= num_batches
+        batch_losses.append(epoch_loss)
+        batch_wdce_losses.append(epoch_wdce_loss)
+        batch_contrastive_losses.append(epoch_contrastive_loss)
+        batch_kl_losses.append(epoch_kl_loss)
+        # Validation
+        if val_dataset is not None and (epoch + 1) % args.validate_every_n_epochs == 0:
+            val_metrics = run_validation(
+                policy_model,
+                multi_target_affinity,
+                directional_oracle,
+                tokenizer,
+                val_dataset,
+                args,
+                epoch,
+                device,
+                protein_token_cache=protein_token_cache
+            )
+            # Log to W&B
+            wandb.log({
+                "epoch": epoch,
+                "val/affinity_mean": val_metrics['affinity_mean'],
+                "val/affinity_std": val_metrics['affinity_std'],
+                "val/gated_reward_mean": val_metrics['gated_reward_mean'],
+                "val/gated_reward_std": val_metrics['gated_reward_std'],
+                "val/direction_oracle_mean": val_metrics['direction_oracle_mean'],
+                "val/direction_oracle_std": val_metrics['direction_oracle_std'],
+                "val/consistency_reward_mean": val_metrics['consistency_reward_mean'],
+                "val/consistency_reward_std": val_metrics['consistency_reward_std'],
+                "val/consistency_agonist_mean": val_metrics['consistency_agonist_mean'],
+                "val/consistency_antagonist_mean": val_metrics['consistency_antagonist_mean'],
+                "val/valid_fraction_mean": val_metrics['valid_fraction_mean'],
+                "val/direction_accuracy_mean": val_metrics['direction_accuracy_mean'],
+                "val/direction_accuracy_std": val_metrics['direction_accuracy_std'],
+                "val/success_rate_mean": val_metrics['success_rate_mean'],
+                "val/success_rate_std": val_metrics['success_rate_std']
+            })
+        # Save checkpoint
+        if (epoch + 1) % args.save_every_n_epochs == 0:
+            model_path = os.path.join(args.save_path, f'model_epoch_{epoch}.ckpt')
+            save_model(policy_model, model_path, config=vars(args), epoch=epoch)
+    # Final save
+    final_model_path = os.path.join(args.save_path, 'model_final.ckpt')
+    save_model(policy_model, final_model_path, config=vars(args))
+    cleanup_wandb()
+    logger.info(f"\n{SEPARATOR_LINE}")
+    logger.info("Training completed!")
+    logger.info(f"{SEPARATOR_LINE}\n")
+if __name__ == '__main__':
+    main()

finetune_utils.py ADDED Viewed

	@@ -0,0 +1,571 @@

+"""Utility functions for TD3B finetuning and sampling."""
+import logging
+import os
+import random
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, Optional, Tuple
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn.functional as F
+import wandb
+from torch.utils.data import DataLoader, TensorDataset
+from tqdm import tqdm
+from diffusion import Diffusion
+from td3b.td3b_mcts import create_td3b_mcts
+from td3b.td3b_scoring import TD3BRewardFunction
+from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
+from utils.utils import sample_categorical_logits
+logger = logging.getLogger(__name__)
+# Standard checkpoint keys to try when loading
+CHECKPOINT_KEYS = ("state_dict", "model_state_dict")
+def to_one_hot(x_idx, num_classes=4):
+    oh = F.one_hot(x_idx.long(), num_classes=num_classes)
+    return oh.float()
+def rnd(model, reward_model, batch_size, scale=1, device="cuda:0"):
+    r"""
+    Run random order sampling and compute the RND $\log\frac{dP^*}{dP^u}$ along the trajectory
+    reward_model: r(X)
+    return:
+    - x: the final samples, [B, D]
+    - log_rnd: the log RND along this trajectory, [B]
+    """
+    if hasattr(model, "module"):
+        model = model.module
+    x = torch.full((batch_size, model.length), model.vocab_size - 1).to(device=device, dtype=torch.int64)
+    batch_arange = torch.arange(batch_size, device=device)
+    jump_pos = torch.rand(x.shape, device=device).argsort(dim=-1)
+    # jump_times, jump_pos = torch.rand(x.shape, device=device).sort(dim=-1)
+    # jump_times: Unif[0,1] in increasing order
+    # jump_pos: random permutation of range(D)
+    log_rnd = torch.zeros(batch_size, device=device)  # [B]
+    for d in range(model.length - 1, -1, -1):
+        # jump at time jump_times[:, d] at position jump_pos[:, d]
+        logits = model(x)[:, :, :-1]  # [B, D, N-1]
+        update = sample_categorical_logits(logits[batch_arange, jump_pos[:, d]])  # [B]
+        if torch.is_grad_enabled():  # avoid issues with in-place operations
+            x = x.clone()
+        x[batch_arange, jump_pos[:, d]] = update
+        log_rnd += -np.log(model.vocab_size - 1) - logits[batch_arange, jump_pos[:, d], update]
+    log_rnd += scale * reward_model(x)  # [B]
+    return x, log_rnd
+@torch.no_grad()
+def sampling(model, batch_size, rounds=1, device="cuda:0"):
+    """Any order autoregressive sampling"""
+    if hasattr(model, "module"):
+        model = model.module
+    batch_arange = torch.arange(batch_size, device=device)
+    all_samples = []
+    for _ in tqdm(range(rounds), leave=False):
+        x = torch.full((batch_size, model.length), model.vocab_size - 1).to(device=device, dtype=torch.int64)
+        jump_pos = torch.rand(x.shape, device=device).argsort(dim=-1)
+        # jump_times, jump_pos = torch.rand(x.shape, device=device).sort(dim=-1)
+        # jump_times: Unif[0,1] in increasing order
+        # jump_pos: random permutation of range(D)
+        for d in tqdm(range(model.length - 1, -1, -1), leave=False):
+            # jump at time jump_times[:, d] at position jump_pos[:, d]
+            logits = model.logits(x)[:, :, :-1]  # [B, D, N-1], not log-softmaxed but fine
+            update = sample_categorical_logits(logits[batch_arange, jump_pos[:, d]])  # [B]
+            x[batch_arange, jump_pos[:, d]] = update
+        all_samples.append(x)
+    return torch.cat(all_samples)  # (rounds * B, L)
+def loss_ce(log_rnd):
+    """Cross entropy loss KL(P^*||P^u)"""
+    weights = log_rnd.detach().softmax(dim=-1)
+    return (log_rnd * weights).sum()
+def loss_lv(log_rnd):
+    r"""Log variance loss Var_{P^\bar{u}}\log\frac{dP^*}{dP^u}"""
+    return log_rnd.var()
+def loss_re_rf(log_rnd, const=0):
+    r"""Relative entropy loss KL(P^u||P^*) with REINFORCE trick"""
+    return (-log_rnd * (-log_rnd.detach() + const)).mean()
+def loss_wdce(
+    policy_model,
+    log_rnd,
+    x,
+    num_replicates=16,
+    weight_func=lambda l: 1 / l,
+    eps=1e-3,
+    centering=False,
+    attn_mask=None,
+):
+    r"""
+    Weighted denoising cross entropy loss
+    X_T ~ P^u_T and weights \log\frac{dP^*}{dP^u}(X)
+    log_rnd: [B]; x: [B, L] (no mask)
+    num_replicates: R, number of replicates of each row in x
+    weight_func: w(lambda) for each sample, 1/lambda by default
+    attn_mask: [B, L] attention mask (1 for real tokens, 0 for padding) - IMPORTANT for variable-length sequences
+    """
+    mask_index = policy_model.mask_index
+    if hasattr(policy_model, "module"):
+        policy_model = policy_model.module
+    batch = x.repeat_interleave(num_replicates, dim=0)  # [B*R, L]
+    batch_weights = log_rnd.detach_().softmax(dim=-1)  # [B*R]
+    if centering:
+        batch_weights = batch_weights - batch_weights.mean(dim=-1, keepdim=True)
+    batch_weights = batch_weights.repeat_interleave(num_replicates, dim=0)
+    lamda = torch.rand(batch.shape[0], device=batch.device)  # [B*R]
+    lamda_weights = weight_func(lamda).clamp(max=1e5)  # [B*R]
+    masked_index = torch.rand(*batch.shape, device=batch.device) < lamda[..., None]  # [B*R, D]
+    perturbed_batch = torch.where(masked_index, mask_index, batch)
+    # add time conditioning
+    t = lamda
+    sigma_t = -torch.log1p(-(1 - eps) * t)
+    # Use provided attention mask or create default (all ones for fixed-length)
+    if attn_mask is not None:
+        attn_mask = attn_mask.repeat_interleave(num_replicates, dim=0).to(policy_model.device)
+    else:
+        attn_mask = torch.ones_like(perturbed_batch).to(policy_model.device)
+    # compute logits
+    logits = policy_model(perturbed_batch, attn_mask=attn_mask, sigma=sigma_t)
+    losses = torch.zeros(*batch.shape, device=batch.device, dtype=logits.dtype)  # [B*R, D]
+    losses[masked_index] = torch.gather(
+        input=logits[masked_index], dim=-1, index=batch[masked_index][..., None]
+    ).squeeze(-1)
+    # Apply attention mask to exclude padding tokens from loss computation.
+    losses = losses * attn_mask
+    return -((losses.sum(dim=-1) * lamda_weights * batch_weights).mean())
+def loss_dce(model, x, weight_func=lambda l: 1 / l):
+    r"""
+    Denoising cross entropy loss, x [B, D] are ground truth samples
+    weight_func: w(lambda) for each sample, 1/lambda by default
+    """
+    lamda = torch.rand(x.shape[0], device=x.device)  # [B]
+    lamda_weights = weight_func(lamda).clamp(max=1e5)  # [B]
+    masked_index = torch.rand(*x.shape, device=x.device) < lamda[..., None]  # [B, D]
+    perturbed_batch = torch.where(masked_index, model.vocab_size - 1, x)
+    logits = model(perturbed_batch)
+    losses = torch.zeros(*x.shape, device=x.device, dtype=logits.dtype)  # [B, D]
+    losses[masked_index] = torch.gather(
+        input=logits[masked_index], dim=-1, index=x[masked_index][..., None]
+    ).squeeze(-1)
+    return -((losses.sum(dim=-1) * lamda_weights).mean())
+def load_tokenizer(base_path: str) -> SMILES_SPE_Tokenizer:
+    """
+    Load the peptide tokenizer from the standard location.
+    Args:
+        base_path: Base directory path (e.g., 'To Be Added')
+    Returns:
+        Loaded SMILES_SPE_Tokenizer instance
+    Example:
+        >>> tokenizer = load_tokenizer('To Be Added')
+    """
+    base_path = Path(base_path)
+    vocab_path = base_path / "tr2d2-pep" / "tokenizer" / "new_vocab.txt"
+    spe_path = base_path / "tr2d2-pep" / "tokenizer" / "new_splits.txt"
+    if not vocab_path.exists():
+        raise FileNotFoundError(f"Vocabulary file not found: {vocab_path}")
+    if not spe_path.exists():
+        raise FileNotFoundError(f"SPE splits file not found: {spe_path}")
+    tokenizer = SMILES_SPE_Tokenizer(str(vocab_path), str(spe_path))
+    logger.info("Loaded tokenizer with vocab_size=%s", tokenizer.vocab_size)
+    return tokenizer
+def load_checkpoint(
+    checkpoint_path: str,
+    model: torch.nn.Module,
+    device: torch.device,
+    strict: bool = True,
+) -> Dict[str, Any]:
+    """
+    Load model weights from checkpoint with automatic key detection.
+    Handles different checkpoint formats:
+    - {'state_dict': ...}
+    - {'model_state_dict': ...}
+    - Direct state_dict
+    Args:
+        checkpoint_path: Path to checkpoint file
+        model: Model to load weights into
+        device: Device to load checkpoint onto
+        strict: Whether to strictly enforce state_dict keys match
+    Returns:
+        Full checkpoint dictionary (for accessing metadata like epoch, config, etc.)
+    Raises:
+        FileNotFoundError: If checkpoint file doesn't exist
+        RuntimeError: If checkpoint loading fails
+    Example:
+        >>> checkpoint = load_checkpoint('model.ckpt', model, device, strict=False)
+        >>> if 'epoch' in checkpoint:
+        >>>     print(f"Loaded from epoch {checkpoint['epoch']}")
+    """
+    if not os.path.exists(checkpoint_path):
+        raise FileNotFoundError(f"Checkpoint not found: {checkpoint_path}")
+    logger.info("Loading checkpoint from: %s", checkpoint_path)
+    checkpoint = torch.load(checkpoint_path, map_location=device)
+    # Try to find state_dict in standard checkpoint keys
+    state_dict = None
+    for key in CHECKPOINT_KEYS:
+        if key in checkpoint:
+            state_dict = checkpoint[key]
+            logger.info("Found state_dict at checkpoint key: '%s'", key)
+            break
+    # If not found in standard keys, assume checkpoint IS the state_dict
+    if state_dict is None:
+        state_dict = checkpoint
+        logger.info("Loading checkpoint as direct state_dict")
+    # Load state dict into model
+    try:
+        incompatible_keys = model.load_state_dict(state_dict, strict=strict)
+        if not strict and (incompatible_keys.missing_keys or incompatible_keys.unexpected_keys):
+            logger.warning("Incompatible keys when loading checkpoint:")
+            if incompatible_keys.missing_keys:
+                logger.warning("  Missing keys: %s...", incompatible_keys.missing_keys[:5])
+            if incompatible_keys.unexpected_keys:
+                logger.warning("  Unexpected keys: %s...", incompatible_keys.unexpected_keys[:5])
+        else:
+            logger.info("Checkpoint loaded successfully")
+    except Exception as exc:
+        raise RuntimeError(f"Failed to load checkpoint: {exc}")
+    return checkpoint
+def initialize_device(device_str: str = "cuda") -> torch.device:
+    """
+    Initialize compute device with fallback to CPU if CUDA unavailable or invalid.
+    Args:
+        device_str: Requested device ('cuda', 'cuda:0', 'cpu', or 'auto')
+    Returns:
+        Torch device object
+    Example:
+        >>> device = initialize_device('cuda')
+        >>> print(device)  # cuda:0 or cpu
+    """
+    if device_str is None or str(device_str).lower() == "auto":
+        device_str = "cuda:0" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
+    try:
+        device = torch.device(device_str)
+    except Exception as exc:
+        logger.warning("Invalid device '%s': %s. Falling back to CPU.", device_str, exc)
+        return torch.device("cpu")
+    if device.type != "cuda":
+        logger.info("Using device: %s", device)
+        return device
+    if not torch.cuda.is_available() or torch.cuda.device_count() == 0:
+        logger.warning("CUDA requested but not available, falling back to CPU")
+        return torch.device("cpu")
+    index = device.index if device.index is not None else 0
+    if index < 0 or index >= torch.cuda.device_count():
+        logger.warning(
+            "CUDA device %s requested but only %d visible; using cuda:0",
+            index,
+            torch.cuda.device_count(),
+        )
+        device = torch.device("cuda:0")
+    logger.info("Using device: %s (%s)", device, torch.cuda.get_device_name(device.index or 0))
+    return device
+def create_output_directory(base_path: str, run_name: str, add_timestamp: bool = True) -> str:
+    """
+    Create output directory for saving results.
+    Args:
+        base_path: Base directory (e.g., 'To Be Added')
+        run_name: Name for this training run
+        add_timestamp: Whether to append timestamp to run_name
+    Returns:
+        Path to created output directory
+    Example:
+        >>> save_path = create_output_directory('To Be Added', 'my_run')
+        >>> # Creates: To Be Added
+    """
+    if add_timestamp:
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        dir_name = f"{run_name}_{timestamp}"
+    else:
+        dir_name = run_name
+    output_dir = os.path.join(base_path, "tr2d2-pep", "results", dir_name)
+    os.makedirs(output_dir, exist_ok=True)
+    logger.info("Created output directory: %s", output_dir)
+    return output_dir
+def save_model(
+    model: torch.nn.Module,
+    save_path: str,
+    config: Optional[Dict[str, Any]] = None,
+    epoch: Optional[int] = None,
+    optimizer_state: Optional[Dict] = None,
+) -> None:
+    """
+    Save model checkpoint with optional metadata.
+    Args:
+        model: Model to save
+        save_path: Path to save checkpoint
+        config: Optional configuration dictionary to save
+        epoch: Optional epoch number
+        optimizer_state: Optional optimizer state dict
+    Example:
+        >>> save_model(model, 'checkpoint.ckpt', config=vars(args), epoch=10)
+    """
+    checkpoint = {"model_state_dict": model.state_dict()}
+    if config is not None:
+        checkpoint["config"] = config
+    if epoch is not None:
+        checkpoint["epoch"] = epoch
+    if optimizer_state is not None:
+        checkpoint["optimizer_state_dict"] = optimizer_state
+    torch.save(checkpoint, save_path)
+    logger.info("Model saved: %s", save_path)
+def setup_wandb(project: str, name: str, config: Dict[str, Any], entity: Optional[str] = None) -> None:
+    """
+    Initialize Weights & Biases logging.
+    Args:
+        project: W&B project name
+        name: Run name
+        config: Configuration dictionary to log
+        entity: Optional W&B team/entity name
+    Example:
+        >>> setup_wandb('my-project', 'run1', vars(args), entity='my-team')
+    """
+    wandb_config = {
+        "project": project,
+        "name": name,
+        "config": config,
+    }
+    if entity:
+        wandb_config["entity"] = entity
+    wandb.init(**wandb_config)
+    logger.info("Initialized W&B: project=%s, run=%s", project, name)
+def cleanup_wandb() -> None:
+    """Finish W&B logging session."""
+    wandb.finish()
+    logger.info("Finished W&B logging")
+def get_mask_index(tokenizer: SMILES_SPE_Tokenizer) -> int:
+    """
+    Get mask token index from tokenizer.
+    Args:
+        tokenizer: Peptide tokenizer
+    Returns:
+        Mask token ID
+    Note:
+        Standardizes mask index retrieval across different code paths.
+    """
+    if hasattr(tokenizer, "mask_token_id"):
+        return tokenizer.mask_token_id
+    return tokenizer.convert_tokens_to_ids(tokenizer.mask_token)
+def create_mcts_instance(
+    args,
+    policy_model: Diffusion,
+    reward_function: TD3BRewardFunction,
+    tokenizer: SMILES_SPE_Tokenizer,
+    buffer_size: Optional[int] = None,
+) -> Any:
+    """
+    Create TD3B MCTS instance with standardized configuration.
+    Args:
+        args: Training arguments
+        policy_model: Diffusion policy model
+        reward_function: TD3B reward function
+        tokenizer: Peptide tokenizer
+        buffer_size: Optional buffer size (uses args.buffer_size if None)
+    Returns:
+        TD3B_MCTS instance
+    Example:
+        >>> mcts = create_mcts_instance(args, model, reward_func, tokenizer)
+    """
+    if hasattr(args, "no_mcts") and args.no_mcts:
+        logger.info("MCTS disabled (--no_mcts flag)")
+        return None
+    # Get mask index using standardized method
+    mask_index = get_mask_index(tokenizer)
+    # Use provided buffer_size or fall back to args
+    if buffer_size is None:
+        buffer_size = getattr(args, "buffer_size", 50)
+    mcts = create_td3b_mcts(
+        args=args,
+        diffusion_model=policy_model,
+        td3b_reward_function=reward_function,
+        alpha=getattr(args, "alpha", 0.1),
+        mask_index=mask_index,
+        buffer_size=buffer_size,
+        tokenizer=tokenizer,
+    )
+    logger.info("Created TD3B MCTS (buffer_size=%s, alpha=%s)", buffer_size, args.alpha)
+    return mcts
+def create_reward_function(
+    affinity_predictor,
+    directional_oracle,
+    target_direction: float,
+    target_protein_tokens: torch.Tensor,
+    tokenizer: SMILES_SPE_Tokenizer,
+    device: torch.device,
+    min_affinity_threshold: float = 0.0,
+    use_confidence_weighting: bool = True,
+    temperature: float = 0.1,
+) -> TD3BRewardFunction:
+    """
+    Create TD3B reward function with standardized parameters.
+    Args:
+        affinity_predictor: Binding affinity prediction model
+        directional_oracle: Directional prediction oracle
+        target_direction: Target direction (1.0 for agonist, -1.0 for antagonist)
+        target_protein_tokens: Protein target tokens
+        tokenizer: Peptide tokenizer
+        device: Compute device
+        min_affinity_threshold: Minimum affinity for allosteric control
+        use_confidence_weighting: Whether to use confidence weighting
+        temperature: Temperature for sigmoid gating
+    Returns:
+        TD3BRewardFunction instance
+    Example:
+        >>> reward_func = create_reward_function(
+        ...     affinity_pred, oracle, 1.0, target_tokens,
+        ...     tokenizer, device, min_affinity_threshold=0.5
+        ... )
+    """
+    reward_func = TD3BRewardFunction(
+        affinity_predictor=affinity_predictor,
+        directional_oracle=directional_oracle,
+        target_direction=target_direction,
+        target_protein_tokens=target_protein_tokens,
+        peptide_tokenizer=tokenizer,
+        device=device,
+        min_affinity_threshold=min_affinity_threshold,
+        use_confidence_weighting=use_confidence_weighting,
+        temperature=temperature,
+    )
+    logger.info(
+        "Created TD3B reward function (d*=%s, threshold=%s)", target_direction, min_affinity_threshold
+    )
+    return reward_func
+def log_gpu_memory(stage: str = "") -> None:
+    """
+    Log current GPU memory usage.
+    Args:
+        stage: Optional stage description for logging context
+    Example:
+        >>> log_gpu_memory("After model loading")
+    """
+    if torch.cuda.is_available():
+        allocated = torch.cuda.memory_allocated() / 1024**3  # GB
+        reserved = torch.cuda.memory_reserved() / 1024**3  # GB
+        stage_str = f" [{stage}]" if stage else ""
+        logger.info(
+            "GPU Memory%s: %.2fGB allocated, %.2fGB reserved",
+            stage_str,
+            allocated,
+            reserved,
+        )
+def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
+    """
+    Count total and trainable parameters in model.
+    Args:
+        model: PyTorch model
+    Returns:
+        Tuple of (total_params, trainable_params)
+    Example:
+        >>> total, trainable = count_parameters(model)
+        >>> print(f"Total: {total:,}, Trainable: {trainable:,}")
+    """
+    total_params = sum(p.numel() for p in model.parameters())
+    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    return total_params, trainable_params

inference.py ADDED Viewed

	@@ -0,0 +1,253 @@

+#!/usr/bin/env python3
+"""
+TD3B Inference Script
+Generate directional binders for target proteins using a finetuned TD3B model.
+Usage:
+    python inference.py \
+        --ckpt_path checkpoints/td3b.ckpt \
+        --val_csv data/test.csv \
+        --save_path results/ \
+        --seed 42
+"""
+import argparse
+import os
+import sys
+import logging
+from typing import Dict, List, Tuple
+import numpy as np
+import pandas as pd
+import torch
+ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
+if ROOT_DIR not in sys.path:
+    sys.path.insert(0, ROOT_DIR)
+from diffusion import Diffusion
+from configs.finetune_config import (
+    DiffusionConfig, RoFormerConfig, NoiseConfig,
+    TrainingConfig, SamplingConfig, EvalConfig, OptimConfig, MCTSConfig,
+)
+from finetune_utils import load_tokenizer, create_reward_function
+from td3b.direction_oracle import DirectionalOracle
+from td3b.td3b_scoring import create_td3b_reward_function
+from utils.app import PeptideAnalyzer
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+# ─── Defaults ─────────────────────────────────────────────────────────────────
+DEFAULTS = dict(
+    seq_length=200,
+    sampling_eps=1e-3,
+    total_num_steps=128,
+    hidden_dim=768,
+    num_layers=8,
+    num_heads=8,
+    alpha=0.1,
+    min_affinity_threshold=0.0,
+    sigmoid_temperature=0.1,
+    num_pool=32,
+    val_samples_per_target=8,
+)
+def load_model(ckpt_path: str, device: torch.device):
+    """Load finetuned TD3B model from checkpoint."""
+    ckpt = torch.load(ckpt_path, map_location=device, weights_only=False)
+    state_dict = ckpt.get("model_state_dict") or ckpt.get("state_dict") or ckpt
+    config = ckpt.get("config") or {}
+    tokenizer = load_tokenizer(ROOT_DIR)
+    cfg = DiffusionConfig(
+        roformer=RoFormerConfig(
+            hidden_size=config.get("hidden_dim", 768),
+            n_layers=config.get("num_layers", 8),
+            n_heads=config.get("num_heads", 8),
+        ),
+        noise=NoiseConfig(),
+        training=TrainingConfig(sampling_eps=1e-3),
+        sampling=SamplingConfig(steps=128, sampling_eps=1e-3),
+        eval_cfg=EvalConfig(),
+        optim=OptimConfig(lr=3e-4),
+        mcts=MCTSConfig(),
+    )
+    model = Diffusion(config=cfg, tokenizer=tokenizer, device=device).to(device)
+    model.load_state_dict(state_dict, strict=False)
+    model.eval()
+    model.tokenizer = tokenizer
+    return model, tokenizer
+def sample_sequences(model, batch_size: int, seq_length: int, num_steps: int, eps: float = 1e-5):
+    """Sample sequences from the diffusion model."""
+    x = model.sample_prior(batch_size, seq_length).to(model.device, dtype=torch.long)
+    timesteps = torch.linspace(1, eps, num_steps + 1, device=model.device)
+    dt = torch.tensor((1 - eps) / num_steps, device=model.device)
+    for i in range(num_steps):
+        t = timesteps[i] * torch.ones(x.shape[0], 1, device=model.device)
+        _, x = model.single_reverse_step(x, t=t, dt=dt)
+        x = x.to(model.device)
+    # Remove remaining masks
+    mask_pos = (x == model.mask_index)
+    if mask_pos.any():
+        t = timesteps[-2] * torch.ones(x.shape[0], 1, device=model.device)
+        _, x = model.single_noise_removal(x, t=t, dt=dt)
+        x = x.to(model.device)
+    return x
+def score_sequences(reward_model, sequences: List[str]):
+    """Score sequences with the TD3B reward function."""
+    result = reward_model(sequences)
+    if isinstance(result, tuple):
+        rewards, info = result
+        return (
+            np.asarray(rewards),
+            np.asarray(info.get("affinities", rewards)),
+            np.asarray(info.get("directions", np.zeros_like(rewards))),
+            np.asarray(info.get("confidences", np.ones_like(rewards))),
+        )
+    rewards = np.asarray(result)
+    return rewards, rewards, np.zeros_like(rewards), np.ones_like(rewards)
+def main():
+    parser = argparse.ArgumentParser(description="TD3B Inference")
+    parser.add_argument("--ckpt_path", type=str, required=True, help="Path to TD3B checkpoint")
+    parser.add_argument("--val_csv", type=str, required=True, help="CSV with Target_Sequence, Ligand_Sequence, label columns")
+    parser.add_argument("--save_path", type=str, default="results", help="Output directory")
+    parser.add_argument("--device", type=str, default="cuda:0")
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--num_pool", type=int, default=32, help="Pool size for candidate generation")
+    parser.add_argument("--val_samples_per_target", type=int, default=8, help="Samples to keep per target-direction")
+    parser.add_argument("--resample_alpha", type=float, default=0.1, help="Temperature for weighted resampling")
+    parser.add_argument("--direction_oracle_ckpt", type=str, default=None)
+    parser.add_argument("--direction_oracle_tr2d2_checkpoint", type=str, default=None)
+    args = parser.parse_args()
+    # Setup
+    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
+    torch.manual_seed(args.seed)
+    np.random.seed(args.seed)
+    os.makedirs(args.save_path, exist_ok=True)
+    analyzer = PeptideAnalyzer()
+    # Load model
+    logger.info(f"Loading model from {args.ckpt_path}")
+    model, tokenizer = load_model(args.ckpt_path, device)
+    # Load targets
+    logger.info(f"Loading targets from {args.val_csv}")
+    df = pd.read_csv(args.val_csv)
+    targets = []
+    for _, row in df.iterrows():
+        targets.append({
+            "target_seq": row["Target_Sequence"],
+            "target_uid": row.get("Target_UniProt_ID", ""),
+            "binder_seq": row.get("Ligand_Sequence", ""),
+            "label": row.get("label", ""),
+            "seq_length": min(len(row.get("Ligand_SMILES", "x" * 200)), 200),
+        })
+    # Build reward function for each target
+    logger.info("Building reward functions...")
+    oracle_ckpt = args.direction_oracle_ckpt or os.path.join(ROOT_DIR, "checkpoints", "direction_oracle.pt")
+    oracle_tr2d2 = args.direction_oracle_tr2d2_checkpoint or os.path.join(ROOT_DIR, "checkpoints", "pretrained.ckpt")
+    records = []
+    for tidx, target in enumerate(targets):
+        for d_star, d_name in [(1.0, "agonist"), (-1.0, "antagonist")]:
+            logger.info(f"[{tidx+1}/{len(targets)}] Target {target['target_uid']} direction={d_name}")
+            # Create reward function
+            try:
+                reward_model = create_reward_function(
+                    base_path=ROOT_DIR,
+                    tokenizer=tokenizer,
+                    target_protein_seq=target["target_seq"],
+                    target_direction="agonist" if d_star > 0 else "antagonist",
+                    device=device,
+                    emb_model=model.backbone,
+                    directional_oracle_checkpoint=oracle_ckpt,
+                    direction_oracle_tr2d2_checkpoint=oracle_tr2d2,
+                )
+            except Exception as e:
+                logger.warning(f"Failed to create reward for {target['target_uid']}: {e}")
+                continue
+            # Generate pool of candidates
+            target_length = target.get("seq_length", 200)
+            x_pool = sample_sequences(model, args.num_pool, target_length, 128)
+            sequences = tokenizer.batch_decode(x_pool)
+            # Check validity
+            valid_mask = np.array([analyzer.is_peptide(seq) for seq in sequences])
+            # Score all
+            gated_rewards, affinities, directions, confidences = score_sequences(reward_model, sequences)
+            direction_accuracy = ((directions > 0.5).astype(float) if d_star > 0
+                                  else (directions < 0.5).astype(float))
+            # Weighted resampling (Algorithm 2)
+            finite = np.isfinite(gated_rewards)
+            if finite.any():
+                rewards_t = torch.as_tensor(gated_rewards[finite], device=device)
+                alpha = max(args.resample_alpha, 1e-6)
+                weights = torch.softmax(rewards_t / alpha, dim=0)
+                idx = torch.multinomial(weights, num_samples=args.val_samples_per_target, replacement=True)
+                valid_idx = np.where(finite)[0]
+                chosen = valid_idx[idx.cpu().numpy()]
+            else:
+                chosen = np.arange(min(args.val_samples_per_target, len(sequences)))
+            # Save only VALID resampled samples
+            for i in chosen:
+                is_valid = bool(valid_mask[i]) if valid_mask.size else False
+                if not is_valid:
+                    continue  # Skip invalid samples
+                records.append({
+                    "target": target["target_seq"][:20],
+                    "target_uid": target["target_uid"],
+                    "sequence": sequences[i],
+                    "target_direction": d_star,
+                    "direction_name": d_name,
+                    "is_valid": True,
+                    "affinity": float(affinities[i]),
+                    "gated_reward": float(gated_rewards[i]),
+                    "direction_oracle": float(directions[i]),
+                    "direction_accuracy": float(direction_accuracy[i]),
+                })
+    # Save results
+    out_df = pd.DataFrame(records)
+    out_path = os.path.join(args.save_path, f"td3b_results_seed{args.seed}.csv")
+    out_df.to_csv(out_path, index=False)
+    # Print summary
+    if len(out_df) > 0:
+        dp = out_df[out_df["target_direction"] == 1.0]
+        dm = out_df[out_df["target_direction"] == -1.0]
+        logger.info(f"\n{'='*60}")
+        logger.info(f"Results saved to {out_path} ({len(out_df)} valid samples)")
+        logger.info(f"  Aff(d*=+1) = {dp['affinity'].mean():.2f}" if len(dp) else "  No agonist samples")
+        logger.info(f"  Aff(d*=-1) = {dm['affinity'].mean():.2f}" if len(dm) else "  No antagonist samples")
+        logger.info(f"  DA(d*=+1)  = {dp['direction_accuracy'].mean():.3f}" if len(dp) else "")
+        logger.info(f"  DA(d*=-1)  = {dm['direction_accuracy'].mean():.3f}" if len(dm) else "")
+        logger.info(f"  Gated Reward = {out_df['gated_reward'].mean():.2f}")
+        logger.info(f"{'='*60}")
+    else:
+        logger.warning("No valid samples generated.")
+if __name__ == "__main__":
+    main()

launch_multi_target.sh ADDED Viewed

	@@ -0,0 +1,175 @@

+#!/bin/bash
+# Multi-Target TD3B Training Launch Script
+# Trains TD3B on multiple protein targets with random sampling strategy
+# ============================================================================
+# Configuration
+# ============================================================================
+# Paths — update these to your local paths
+BASE_PATH="/path/to/TD3B"
+PRETRAINED_CHECKPOINT="${BASE_PATH}/checkpoints/pretrained.ckpt"
+TRAIN_CSV="${BASE_PATH}/data/train.csv"
+VAL_CSV="${BASE_PATH}/data/test.csv"  # Optional: create validation split
+# Run configuration
+RUN_NAME="multi_target_td3b"  # Timestamp will be added automatically
+DEVICE="cuda:0"
+# Multi-target sampling
+TARGETS_PER_MCTS=2        # Number of targets sampled per MCTS round (K)
+RESAMPLE_TARGETS_EVERY=1  # Resample targets every N epochs
+# Training hyperparameters
+NUM_EPOCHS=200
+LEARNING_RATE=3e-4
+TRAIN_BATCH_SIZE=1            # Small batch size to prevent OOM
+GRADIENT_ACCUMULATION_STEPS=32  # Effective batch size = 16 * 4 = 64
+RESAMPLE_EVERY=10              # Run MCTS every N epochs
+SAVE_EVERY=20
+VALIDATE_EVERY=20
+RESET_TREE_EVERY=50
+# MCTS hyperparameters (aligned with v1, but can reduce for multi-target)
+NUM_ITER=20          # MCTS iterations per resample (v1 default: 50, reduced for multi-target)
+NUM_CHILDREN=16     # Children per MCTS expansion
+BUFFER_SIZE=50       # Pareto buffer size (v1 default: 50)
+REPLAY_BUFFER_SIZE=1000  # Recommended range: 500-5000 (0 disables replay)
+REPLAY_BUFFER_STRATEGY="fifo"  # fifo or random
+ALPHA=0.1           # Temperature for importance weighting
+EXPLORATION=1.0     # UCB exploration constant
+# TD3B hyperparameters (aligned with v1 defaults)
+CONTRASTIVE_WEIGHT=0.1      # v1 default: 0.1
+CONTRASTIVE_MARGIN=1.0
+KL_BETA=0.1                 # v1 default: 0.1
+MIN_AFFINITY_THRESHOLD=0.0  # CRITICAL: minimum affinity for allosteric control
+SIGMOID_TEMPERATURE=0.1
+# Validation
+VAL_SAMPLES_PER_TARGET=20  # Number of sequences per target during validation
+# Directional oracle (GPCR classifier)
+ORACLE_CKPT="${BASE_PATH}/checkpoints/direction_oracle.pt"
+ORACLE_TR2D2_CHECKPOINT="${BASE_PATH}/checkpoints/pretrained.ckpt"
+ORACLE_TOKENIZER_VOCAB="${BASE_PATH}/tokenizer/new_vocab.txt"
+ORACLE_TOKENIZER_SPLITS="${BASE_PATH}/tokenizer/new_splits.txt"
+ORACLE_ESM_NAME="facebook/esm2_t33_650M_UR50D"
+ORACLE_ESM_CACHE_DIR=""  # Optional: set to a cache dir path
+ORACLE_ESM_LOCAL_FILES_ONLY=0  # Set to 1 to avoid network access
+ORACLE_MAX_LIGAND_LENGTH=768
+ORACLE_MAX_PROTEIN_LENGTH=1024
+ORACLE_D_MODEL=256
+ORACLE_N_HEADS=4
+ORACLE_N_SELF_ATTN_LAYERS=1
+ORACLE_N_BMCA_LAYERS=2
+ORACLE_DROPOUT=0.3
+EXTRA_ORACLE_ARGS=""
+if [ -n "$ORACLE_ESM_CACHE_DIR" ]; then
+    EXTRA_ORACLE_ARGS="$EXTRA_ORACLE_ARGS --direction_oracle_esm_cache_dir $ORACLE_ESM_CACHE_DIR"
+fi
+if [ "$ORACLE_ESM_LOCAL_FILES_ONLY" -eq 1 ]; then
+    EXTRA_ORACLE_ARGS="$EXTRA_ORACLE_ARGS --direction_oracle_esm_local_files_only"
+fi
+# W&B (optional)
+WANDB_PROJECT="tr2d2-multi-target"
+WANDB_ENTITY="phos_zj"
+# ============================================================================
+# Launch Training
+# ============================================================================
+cd ${BASE_PATH}
+echo "============================================================================"
+echo "Multi-Target TD3B Training"
+echo "============================================================================"
+echo "Configuration:"
+echo "  - Targets per MCTS: ${TARGETS_PER_MCTS}"
+echo "  - Training batch size: ${TRAIN_BATCH_SIZE}"
+echo "  - Gradient accumulation: ${GRADIENT_ACCUMULATION_STEPS}"
+echo "  - Effective batch size: $((TRAIN_BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS))"
+echo "  - Epochs: ${NUM_EPOCHS}"
+echo "  - MCTS iterations: ${NUM_ITER}"
+echo "  - MCTS children: ${NUM_CHILDREN}"
+echo "  - Buffer size: ${BUFFER_SIZE}"
+echo "  - Replay buffer size: ${REPLAY_BUFFER_SIZE} (${REPLAY_BUFFER_STRATEGY})"
+echo "============================================================================"
+echo ""
+# Build command
+CMD="python finetune_multi_target.py \
+    --base_path ${BASE_PATH} \
+    --train_csv ${TRAIN_CSV} \
+    --pretrained_checkpoint ${PRETRAINED_CHECKPOINT} \
+    --run_name ${RUN_NAME} \
+    --device ${DEVICE} \
+    \
+    --targets_per_mcts ${TARGETS_PER_MCTS} \
+    --resample_targets_every ${RESAMPLE_TARGETS_EVERY} \
+    \
+    --num_epochs ${NUM_EPOCHS} \
+    --learning_rate ${LEARNING_RATE} \
+    --train_batch_size ${TRAIN_BATCH_SIZE} \
+    --gradient_accumulation_steps ${GRADIENT_ACCUMULATION_STEPS} \
+    --resample_every_n_step ${RESAMPLE_EVERY} \
+    --save_every_n_epochs ${SAVE_EVERY} \
+    --validate_every_n_epochs ${VALIDATE_EVERY} \
+    --reset_every_n_step ${RESET_TREE_EVERY} \
+    \
+    --num_iter ${NUM_ITER} \
+    --num_children ${NUM_CHILDREN} \
+    --buffer_size ${BUFFER_SIZE} \
+    --replay_buffer_size ${REPLAY_BUFFER_SIZE} \
+    --replay_buffer_strategy ${REPLAY_BUFFER_STRATEGY} \
+    --alpha ${ALPHA} \
+    --exploration ${EXPLORATION} \
+    \
+    --contrastive_weight ${CONTRASTIVE_WEIGHT} \
+    --contrastive_margin ${CONTRASTIVE_MARGIN} \
+    --kl_beta ${KL_BETA} \
+    --min_affinity_threshold ${MIN_AFFINITY_THRESHOLD} \
+    --sigmoid_temperature ${SIGMOID_TEMPERATURE} \
+    \
+    --direction_oracle_ckpt ${ORACLE_CKPT} \
+    --direction_oracle_tr2d2_checkpoint ${ORACLE_TR2D2_CHECKPOINT} \
+    --direction_oracle_tokenizer_vocab ${ORACLE_TOKENIZER_VOCAB} \
+    --direction_oracle_tokenizer_splits ${ORACLE_TOKENIZER_SPLITS} \
+    --direction_oracle_esm_name ${ORACLE_ESM_NAME} \
+    --direction_oracle_max_ligand_length ${ORACLE_MAX_LIGAND_LENGTH} \
+    --direction_oracle_max_protein_length ${ORACLE_MAX_PROTEIN_LENGTH} \
+    --direction_oracle_d_model ${ORACLE_D_MODEL} \
+    --direction_oracle_n_heads ${ORACLE_N_HEADS} \
+    --direction_oracle_n_self_attn_layers ${ORACLE_N_SELF_ATTN_LAYERS} \
+    --direction_oracle_n_bmca_layers ${ORACLE_N_BMCA_LAYERS} \
+    --direction_oracle_dropout ${ORACLE_DROPOUT} \
+    ${EXTRA_ORACLE_ARGS} \
+    \
+    --val_samples_per_target ${VAL_SAMPLES_PER_TARGET} \
+    \
+    --grad_clip \
+    --gradnorm_clip 1.0 \
+    --wandb_project ${WANDB_PROJECT}"
+# Add validation CSV if it exists
+if [ -f "${VAL_CSV}" ]; then
+    CMD="${CMD} --val_csv ${VAL_CSV}"
+    echo "Validation CSV: ${VAL_CSV}"
+else
+    echo "No validation CSV found (${VAL_CSV})"
+    echo "Skipping validation during training"
+fi
+# Add W&B entity if specified
+if [ -n "${WANDB_ENTITY}" ]; then
+    CMD="${CMD} --wandb_entity ${WANDB_ENTITY}"
+fi
+echo ""
+echo "Launching training..."
+echo ""
+# Execute
+eval $CMD

noise_schedule.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import abc
+import torch
+import torch.nn as nn
+torch._C._jit_set_profiling_mode(False)
+torch._C._jit_set_profiling_executor(False)
+torch._C._jit_override_can_fuse_on_cpu(True)
+torch._C._jit_override_can_fuse_on_gpu(True)
+def get_noise(config, dtype=torch.float32):
+	if config.noise.type == 'geometric':
+		return GeometricNoise(config.noise.sigma_min, config.noise.sigma_max)
+	elif config.noise.type  == 'loglinear':
+		return LogLinearNoise()
+	elif config.noise.type  == 'cosine':
+		return CosineNoise()
+	elif config.noise.type  == 'cosinesqr':
+		return CosineSqrNoise()
+	elif config.noise.type  == 'linear':
+		return Linear(config.noise.sigma_min, config.noise.sigma_max, dtype)
+	else:
+		raise ValueError(f'{config.noise.type} is not a valid noise')
+def binary_discretization(z):
+	z_hard = torch.sign(z)
+	z_soft = z / torch.norm(z, dim=-1, keepdim=True)
+	return z_soft + (z_hard - z_soft).detach()
+class Noise(abc.ABC, nn.Module):
+	"""
+	Baseline forward method to get the total + rate of noise at a timestep
+	"""
+	def forward(self, t):
+		# Assume time goes from 0 to 1
+		return self.total_noise(t), self.rate_noise(t)
+class CosineNoise(Noise):
+	def __init__(self, eps=1e-3):
+		super().__init__()
+		self.eps = eps
+	def rate_noise(self, t):
+		cos = (1 - self.eps) * torch.cos(t * torch.pi / 2)
+		sin = (1 - self.eps) * torch.sin(t * torch.pi / 2)
+		scale = torch.pi / 2
+		return scale * sin / (cos + self.eps)
+	def total_noise(self, t):
+		cos = torch.cos(t * torch.pi / 2)
+		return - torch.log(self.eps + (1 - self.eps) * cos)
+class CosineSqrNoise(Noise):
+	def __init__(self, eps=1e-3):
+		super().__init__()
+		self.eps = eps
+	def rate_noise(self, t):
+		cos = (1 - self.eps) * (
+			torch.cos(t * torch.pi / 2) ** 2)
+		sin = (1 - self.eps) * torch.sin(t * torch.pi)
+		scale = torch.pi / 2
+		return scale * sin / (cos + self.eps)
+	def total_noise(self, t):
+		cos = torch.cos(t * torch.pi / 2) ** 2
+		return - torch.log(self.eps + (1 - self.eps) * cos)
+class Linear(Noise):
+	def __init__(self, sigma_min=0, sigma_max=10, dtype=torch.float32):
+		super().__init__()
+		self.sigma_min = torch.tensor(sigma_min, dtype=dtype)
+		self.sigma_max = torch.tensor(sigma_max, dtype=dtype)
+	def rate_noise(self):
+		return self.sigma_max - self.sigma_min
+	def total_noise(self, t):
+		return self.sigma_min + t * (self.sigma_max - self.sigma_min)
+	def importance_sampling_transformation(self, t):
+		f_T = torch.log1p(- torch.exp(- self.sigma_max))
+		f_0 = torch.log1p(- torch.exp(- self.sigma_min))
+		sigma_t = - torch.log1p(- torch.exp(t * f_T + (1 - t) * f_0))
+		return (sigma_t - self.sigma_min) / (
+			self.sigma_max - self.sigma_min)
+class GeometricNoise(Noise):
+	def __init__(self, sigma_min=1e-3, sigma_max=1):
+		super().__init__()
+		self.sigmas = 1.0 * torch.tensor([sigma_min, sigma_max])
+	def rate_noise(self, t):
+		return self.sigmas[0] ** (1 - t) * self.sigmas[1] ** t * (
+			self.sigmas[1].log() - self.sigmas[0].log())
+	def total_noise(self, t):
+		return self.sigmas[0] ** (1 - t) * self.sigmas[1] ** t
+class LogLinearNoise(Noise):
+	"""Log Linear noise schedule.
+	Built such that 1 - 1/e^(n(t)) interpolates between 0 and
+	~1 when t varies from 0 to 1. Total noise is
+	-log(1 - (1 - eps) * t), so the sigma will be
+	(1 - eps) * t.
+	"""
+	def __init__(self, eps=1e-3):
+		super().__init__()
+		self.eps = eps
+		self.sigma_max = self.total_noise(torch.tensor(1.0))
+		self.sigma_min = self.eps + self.total_noise(torch.tensor(0.0))
+	def rate_noise(self, t):
+		return (1 - self.eps) / (1 - (1 - self.eps) * t)
+	def total_noise(self, t):
+		return -torch.log1p(-(1 - self.eps) * t)
+	def importance_sampling_transformation(self, t):
+		f_T = torch.log1p(- torch.exp(- self.sigma_max))
+		f_0 = torch.log1p(- torch.exp(- self.sigma_min))
+		sigma_t = - torch.log1p(- torch.exp(t * f_T + (1 - t) * f_0))
+		t = - torch.expm1(- sigma_t) / (1 - self.eps)
+		return t
+class LogPolyNoise(Noise):
+	"""
+ 	Log Polynomial noise schedule for slower masking of peptide bond tokens
+	"""
+	def __init__(self, eps=1e-3):
+		super().__init__()
+		self.eps = eps
+		self.sigma_max = self.total_noise(torch.tensor(1.0))
+		self.sigma_min = self.eps + self.total_noise(torch.tensor(0.0))
+	def rate_noise(self, t):
+		# derivative of -log(1-t^w)
+		return ((3 * (t**2)) - self.eps) / (1 - (1 - self.eps) * (t**3))
+	def total_noise(self, t):
+		# -log(1-t^w)
+		return -torch.log1p(-(1 - self.eps) * (t**3))

peptide_mcts.py ADDED Viewed

	@@ -0,0 +1,676 @@

+import numpy as np
+import torch
+import torch.nn.functional as F
+import numpy as np
+import random as rd
+from utils.app import PeptideAnalyzer
+from utils.timer import StepTimer
+from scoring.scoring_functions import ScoringFunctions
+import noise_schedule
+### for peptide multi-objective ###
+def dominates(a, b):
+        a = np.asarray(a); b = np.asarray(b)
+        return np.all(a >= b) and np.any(a > b)
+def dominated_by(a, b):
+    return dominates(b, a)
+def updateParetoFront(paretoFront, node, scoreVector, totalSize=None, eps=1e-12):
+    """
+    Maintain a non-dominated set (Pareto front) of (node -> scoreVector).
+    - Accept 'node' iff it is NOT dominated by any node in the set.
+    - Remove any nodes that ARE dominated by 'node'.
+    - Skip insertion if an equal point already exists (within eps).
+    - If totalSize is given and the archive exceeds it, drop the item
+      with the smallest sum(scoreVector) as a simple tie-breaker.
+    Args:
+        paretoFront (dict): {node: scoreVector}
+        node: candidate node (used as dict key)
+        scoreVector (array-like): candidate scores (to be maximized)
+        totalSize (int|None): optional max size for the archive
+        eps (float): tolerance for equality/inequality checks
+    Returns:
+        dict: updated paretoFront
+    """
+    s = np.asarray(scoreVector, dtype=float)
+    def dominates(a, b):
+        # a >= b in all coords and > in at least one (with tolerance)
+        return np.all(a >= b - eps) and np.any(a > b + eps)
+    def equal(a, b):
+        return np.all(np.abs(a - b) <= eps)
+    # reject if candidate is dominated by any node already in the set
+    for v in paretoFront.values():
+        v = np.asarray(v, dtype=float)
+        if dominates(v, s):
+            return paretoFront  # no change
+    # remove any nodes dominated by candidate node
+    survivors = {}
+    #has_equal = False
+    for k, v in paretoFront.items():
+        v_arr = np.asarray(v, dtype=float)
+        if dominates(s, v_arr):
+            continue  # drop dominated incumbent
+        """if equal(s, v_arr):
+            has_equal = True  # skip duplicate insertion later"""
+        survivors[k] = v_arr
+    # if an equal point exists, keep survivors as-is (no duplicate)
+    """if has_equal:
+        return survivors"""
+    # insert node
+    survivors[node] = s
+    # delete nodes if larger than total size
+    if totalSize is not None and totalSize > 0 and len(survivors) > totalSize:
+        # remove the item with the smallest sum(scoreVector)
+        keys = list(survivors.keys())
+        sums = np.array([np.sum(np.asarray(survivors[k], dtype=float)) for k in keys])
+        drop_idx = int(np.argmin(sums))
+        del survivors[keys[drop_idx]]
+    return survivors
+### BEGINNING OF NODE CLASS ###
+class Node:
+    """
+    Node class: partially unmasked sequence
+    - parentNode: Node object at previous time step
+    - childNodes: set of M Node objects generated from sampling M distinct unmasking schemes
+    - totalReward: vector of cumulative rewards for all K objectives
+    - visits: number of times the node has been visited by an interation
+    - path: array of partially unmasked SMILES strings leading to the node from the completely masked root node
+    - timestep: the time step where the sequence was sampled
+    """
+    def __init__(self, args, tokens=None, log_rnd=None, log_policy_step=None, log_pretrained_step=None, parentNode=None, childNodes=None, totalReward=None, timestep=None):
+        self.args = args
+        self.parentNode = parentNode
+        # fixed child node list creation
+        self.childNodes = [] if childNodes is None else childNodes
+        self.log_rnd = log_rnd # stores the log_rnd up to that step
+        #self.log_p0 = 0 # stores the log probabiltiy of the unmasking step from the previous iteration
+        self.log_policy_step = log_policy_step # stores the log probability of the unmasking step under the current policy
+        self.log_pretrained_step = log_pretrained_step
+        # initialize total rewards to the reward of the roll out unmasked sequence
+        if totalReward is not None:
+            self.totalReward = totalReward # potential reward of the node based on generated children
+        else:
+            self.totalReward = np.zeros(self.args.num_obj)
+        # set initial visits to 1
+        self.visits = 1
+        # set timestep (value between 0 and num_steps)
+        self.timestep = timestep
+        # dict with 'seqs' as token array and 'attention_mask'
+        self.tokens = tokens
+    def selectNode(self):
+        """
+            Selects a node to move to among the children nodes based on select score
+        """
+        # extract the status of the current node
+        nodeStatus = self.getExpandStatus()
+        # if the node is a legal non-leaf node
+        if (nodeStatus == 3):
+            # initialize array that will store select score vectors of each child node
+            paretoFront = {}
+            for childNode in self.childNodes:
+                childStatus = childNode.getExpandStatus()
+                # only append child if it is legal leaf node (expandable) or legal non-leaf node
+                if childStatus == 2 or childStatus == 3:
+                    selectScore = childNode.calcSelectScore()
+                    paretoFront = updateParetoFront(paretoFront, childNode, selectScore)
+            selected = rd.choice(list(paretoFront.keys()))
+            # return selected child node and status
+            return selected, selected.getExpandStatus()
+        # if node is not valid non-leaf node
+        return self, nodeStatus
+    def addChildNode(self, tokens, log_rnd, log_policy_step, log_pretrained_step, totalReward):
+        """"
+            Adds a child node:
+            log_rnd: log_rnd of the path up to the added child node
+            log_policy_step: scalar value of the log-prob of sampling the step under the policy
+            log_pretrained_step: scalar value of the log-prob of sampling the step under the pretrained model
+        """
+        child = Node(args=self.args,
+                     tokens=tokens,
+                     log_rnd = log_rnd,
+                     log_policy_step=log_policy_step,
+                     log_pretrained_step=log_pretrained_step,
+                     parentNode=self,
+                     childNodes=[],
+                     totalReward=totalReward,
+                     timestep=self.timestep+1)
+        self.childNodes.append(child)
+        return child
+    def update_logrnd(self, log_policy_step, log_rnd):
+        self.log_policy_step = log_policy_step
+        self.log_rnd = log_rnd
+    def updateNode(self, rewards):
+        """
+            Updates the cumulative rewards vector with the reward vector at a descendent leaf node.
+            Increments the number of visits to the node.
+        """
+        self.visits += 1
+        self.totalReward += rewards # singleton tensor
+    def calcSelectScore(self):
+        """
+            Calculates the select score for the node from the cumulative rewards vector and number of visits.
+            - c: determines the degree of exploration
+            - minSelectScore: determines the
+        """
+        scaling = 0.1 # scaling of the second term in the select score
+        # K-dimensional vector of normalized rewards for each objective
+        normRewards = self.totalReward / self.visits
+        # scales the cumulative reward by the sampling probability
+        return normRewards + (scaling * self.log_policy_step.detach().cpu().item() * np.sqrt(self.parentNode.visits) / self.visits)
+    def getExpandStatus(self):
+        """
+            Returns an integer indicating whether the node is a:
+            1. terminal node (sequence is fully unmasked)
+            2. legal leaf node (partially unmasked sequence that can be expanded)
+            3. legal non-leaf node (already expanded sequence with M child nodes)
+        """
+        if self.timestep == self.args.total_num_steps:
+            return 1
+        elif (self.timestep < self.args.total_num_steps) and (len(self.childNodes) == 0):
+            return 2
+        return 3
+### END OF NODE CLASS ###
+### BEGINNING OF MCTS CLASS ###
+class MCTS:
+    def __init__(
+        self,
+        args,
+        config,
+        policy_model,
+        pretrained,
+        score_func_names=None,
+        prot_seqs=None,
+        rootNode=None,
+        reward_func=None,
+        num_obj=None,
+    ):
+        self.timer = StepTimer(policy_model.device)
+        self.device = policy_model.device
+        self.args = args
+        self.config = config
+        self.noise = noise_schedule.get_noise(config)
+        self.time_conditioning = args.time_conditioning
+        if score_func_names is None:
+            score_func_names = []
+        if num_obj is None:
+            num_obj = getattr(reward_func, "num_obj", None)
+        self.num_obj = num_obj if num_obj is not None else len(score_func_names)
+        self.mask_index = policy_model.mask_index
+        masked_seq = torch.ones((self.args.seq_length), device = self.device) * self.mask_index
+        masked_tokens = {'seqs': masked_seq.to(dtype=torch.long), 'attention_mask': torch.ones_like(masked_seq).to(self.device)}
+        if rootNode is None:
+            self.rootNode = Node(self.args, tokens = masked_tokens,
+                                 log_rnd=torch.zeros((), device=self.device),
+                                 log_policy_step=torch.zeros((), device=self.device),
+                                 log_pretrained_step=torch.zeros((), device=self.device),
+                                 totalReward=np.zeros(self.num_obj), timestep=0)
+        else:
+            self.rootNode = rootNode  # stores the root node of the tree
+        # dictionary:
+        # "seq": final unmasked sequence
+        # "traj": list of (N_steps, L)
+        # "reward": reward of the trajectory
+        self.buffer = [] # List[Dict[str, Any]]
+        self.buffer_size = args.buffer_size
+        self.num_steps = args.total_num_steps
+        #self.num_sequences = args.num_sequences
+        # pretrained model
+        self.pretrained = pretrained
+        # the policy model that we want to finetune
+        self.policy_model = policy_model
+        #self.tokenizer = policy_model.tokenizer
+        self.device = policy_model.device
+        self.sequence_length = args.seq_length
+        self.num_iter = args.num_iter
+        self.num_children = args.num_children
+        # score functions
+        if reward_func is None:
+            self.rewardFunc = ScoringFunctions(score_func_names, prot_seqs, device=args.device)
+        else:
+            self.rewardFunc = reward_func
+        self.iter_num = 0
+        self.reward_log = [] # stores scalarized total rewards
+        self.logrnd_log = []
+        # stores each objective
+        self.valid_fraction_log = []
+        self.affinity1_log = []
+        self.affinity2_log = []
+        self.permeability_log = []
+        self.sol_log = []
+        self.hemo_log = []
+        self.nf_log = []
+        self.policy_model.eval()
+        self.pretrained.eval()
+        # for peptides
+        self.analyzer = PeptideAnalyzer()
+        self.tokenizer = policy_model.tokenizer
+    def reset(self, resetTree):
+        self.iter_num = 0
+        self.buffer = []
+        self.reward_log = []
+        self.logrnd_log = []
+        # reset logs for each objective
+        self.valid_fraction_log = []
+        self.affinity1_log = []
+        self.affinity2_log = []
+        self.permeability_log = []
+        self.sol_log = []
+        self.hemo_log = []
+        self.nf_log = []
+        # add option to continue with the same tree
+        if resetTree:
+            masked_seq = torch.ones((self.args.seq_length), device = self.device) * self.mask_index
+            masked_tokens = {'seqs': masked_seq.to(dtype=torch.long), 'attention_mask': torch.ones_like(masked_seq).to(self.device)}
+            self.rootNode = Node(self.args, tokens = masked_tokens,
+                                 log_rnd=torch.zeros((), device=self.device),
+                                 log_policy_step=torch.zeros((), device=self.device),
+                                 log_pretrained_step=torch.zeros((), device=self.device),
+                                 totalReward=np.zeros(self.num_obj), timestep=0)
+    def forward(self, resetTree=False):
+        self.reset(resetTree)
+        while (self.iter_num < self.num_iter):
+            self.iter_num += 1
+            # traverse the tree form the root node until a leaf node
+            with self.timer.section("select"):
+                leafNode, _ = self.select(self.rootNode)
+            # expand leaf node into num_children partially unmasked sequences at the next timestep
+            with self.timer.section("expand"):
+                self.expand(leafNode)
+        final_x, log_rnd, final_rewards, score_vectors, sequences = self.consolidateBuffer()
+        # return final_seqs (B, L), log_rnd (B, ), and final rewards (B, )
+        rows = self.timer.summary()
+        print("\n=== Timing summary (by total time) ===")
+        for name, cnt, total, mean, p50, p95 in rows:
+            print(f"{name:30s}  n={cnt:5d}  total={total:8.3f}s  mean={mean*1e3:7.2f}ms  "
+                f"p50={p50*1e3:7.2f}ms  p95={p95*1e3:7.2f}ms")
+        return final_x, log_rnd, final_rewards, score_vectors, sequences
+    # new updateBuffer
+    def _debug_buffer_decision(self, sv, reason, extra=None):
+        if extra is None: extra = {}
+        print(f"[BUFFER] reason={reason} sv={np.round(sv,4)} "
+            f"buf_len={len(self.buffer)} extra={extra}")
+    def updateBuffer(self, x_final, log_rnd, score_vectors, childSequences):
+        B = x_final.shape[0]
+        traj_log_rnds, scalar_rewards = [], []
+        for i in range(B):
+            sv = np.asarray(score_vectors[i], dtype=float)
+            # determine how to scalarize the multi-objective rewards
+            if self.args.scalarization == "normalized":
+                pass
+            elif self.args.scalarization == "weighted":
+                pass
+            else:
+                scalar_reward = float(np.sum(sv))
+            traj_log_rnd = log_rnd[i] + (scalar_reward / self.args.alpha) # scale down by alpha
+            item = {
+                "x_final": x_final[i].clone(), # clone?
+                "log_rnd": traj_log_rnd.clone(),
+                "final_reward": scalar_reward,
+                "score_vector": sv.copy(),
+                "seq": childSequences[i],
+            }
+            # Drop if dominated by any existing
+            if any(dominated_by(sv, bi["score_vector"]) for bi in self.buffer):
+                # for debugging
+                self._debug_buffer_decision(sv, "rejected_dominated")
+                continue
+            # Remove any existing that this candidate dominates
+            keep = []
+            for bi in self.buffer:
+                if not dominates(sv, bi["score_vector"]):
+                    keep.append(bi)
+            self.buffer = keep
+            # Insert with capacity rule
+            if len(self.buffer) < self.buffer_size:
+                self.buffer.append(item)
+            else:
+                # tie-breaker: replace the worst by a simple heuristic (min sum)
+                worst_i = int(np.argmin([np.sum(bi["score_vector"]) for bi in self.buffer]))
+                self.buffer[worst_i] = item
+            # for debugging
+            self._debug_buffer_decision(sv, "inserted", {"new_len": len(self.buffer)})
+            traj_log_rnds.append(traj_log_rnd)
+            scalar_rewards.append(scalar_reward)
+        traj_log_rnds = torch.stack(traj_log_rnds, dim=0) if traj_log_rnds else torch.empty(0)
+        scalar_rewards = np.asarray(scalar_rewards, dtype=float)
+        return traj_log_rnds, scalar_rewards
+    def consolidateBuffer(self):
+        """
+        returns x_final, log_rnd, and final_rewards in tensors
+        """
+        x_final = []
+        log_rnd = []
+        final_rewards = []
+        score_vectors = []
+        sequences = []
+        for item in self.buffer:
+            x_final.append(item["x_final"])
+            log_rnd.append(item["log_rnd"])
+            final_rewards.append(item["final_reward"])
+            score_vectors.append(item["score_vector"])
+            sequences.append(item["seq"])
+        x_final = torch.stack(x_final, dim=0) # (B, L)
+        log_rnd = torch.stack(log_rnd, dim=0).to(dtype=torch.float32) # (B)
+        final_rewards = np.stack(final_rewards, axis=0).astype(np.float32)
+        score_vectors = np.stack(score_vectors, axis=0).astype(np.float32)
+        return x_final, log_rnd, final_rewards, score_vectors, sequences
+    def isPathEnd(self, path, maxDepth):
+        """
+            Checks if the node is completely unmasked (ie. end of path)
+            or if the path is at the max depth
+        """
+        if (path[-1] != self.mask_index).all():
+            return True
+        elif len(path) >= maxDepth:
+            return True
+        return False
+    def select(self, currNode, eps=1e-5):
+        """
+            Traverse the tree from the root node until reaching a legal leaf node
+        """
+        updated_log_rnd = torch.zeros((), device=self.device)
+        while True:
+            currNode, nodeStatus = currNode.selectNode()
+            if currNode.parentNode is not None:
+                # compute new log_policy
+                child_tokens = currNode.tokens['seqs'].to(self.device)
+                attn_mask = currNode.tokens['attention_mask'].to(self.device)
+                parent = currNode.parentNode
+                parent_tokens = parent.tokens['seqs'].to(self.device)
+                t = torch.ones(1, device = self.device)
+                dt = (1 - eps) / self.num_steps
+                with torch.no_grad():
+                    with self.timer.section("select.compute_log_policy"):
+                        updated_log_policy_step = self.policy_model.compute_log_policy(parent_tokens,
+                                                                                   child_tokens,
+                                                                                   t=t, dt=dt)
+                updated_log_rnd += updated_log_policy_step
+                currNode.update_logrnd(updated_log_policy_step, updated_log_rnd) # update log_rnd
+            if nodeStatus != 3:
+                return currNode, nodeStatus
+    def expand(self, parentNode, eps=1e-5):
+        """
+        Sample unmasking steps from the pre-trained MDLM
+        adds num_children partially unmasked sequences to the children of the parentNode
+        """
+        num_children = self.num_children
+        # initialize child rewards that will be added to total rewards
+        # compute number of rollout steps
+        # if parentNode.timestep = self.num_steps then num_rollout_steps = 1
+        num_rollout_steps = self.num_steps - parentNode.timestep
+        # array of rollout timesteps from the timestep of parent node to 0
+        rollout_t = torch.linspace(1, eps, self.num_steps + 1, device=self.device)
+        dt = (1 - eps) / self.num_steps
+        # initialize x and attn_mask
+        x = parentNode.tokens['seqs'].to(self.device)
+        attn_mask = parentNode.tokens['attention_mask'].to(self.device)
+        parent_log_rnd = parentNode.log_rnd # stores the log_rnd up to parent node
+        t = rollout_t[parentNode.timestep] * torch.ones(1, 1, device = self.device)
+        # sample M child sequences and compute their log probabilities
+        with torch.no_grad():
+            with self.timer.section("expand.batch_mcts_reverse_step"):
+                _, x_children, child_log_policy_step, child_log_pretrained_step = \
+                    self.policy_model.batch_mcts_reverse_step(token_array=x,
+                                                            t=t, dt=dt,
+                                                            batch_size=num_children,
+                                                            pretrained=self.pretrained)
+        # compute weight of the step (num_children, 1)
+        child_log_rnd = (parent_log_rnd + (child_log_pretrained_step - child_log_policy_step)).to(self.device)
+        x_rollout = x_children
+        traj_log_rnd = child_log_rnd # initialize log_rnd for entire rolled out trajectory
+        # rollout under the policy and compute the log ratio at each step
+        with self.timer.section("expand.rollout_total"):
+            for i in range(1, num_rollout_steps):
+                t = rollout_t[parentNode.timestep + i] * torch.ones(num_children, 1, device = self.device)
+                with torch.no_grad():
+                    _, x_next, log_policy_step, log_pretrained_step = \
+                        self.policy_model.mcts_reverse_step(x_rollout,
+                                                            t=t, dt=dt,
+                                                            pretrained=self.pretrained)
+                # add the rollout step
+                traj_log_rnd += log_pretrained_step - log_policy_step
+                x_rollout = x_next
+        # if mask token remains, fully unmask
+        mask_positions = (x_rollout == self.mask_index)        # (B, L) bool
+        # does **any** mask remain in any sequence
+        any_mask_global = mask_positions.any().item()  # true if mask remains
+        if any_mask_global:
+            with torch.no_grad():
+                with self.timer.section("expand.noise_removal"):
+                    log_p, x_next, log_policy_step, log_pretrained_step = \
+                        self.policy_model.mcts_noise_removal(x_rollout,
+                                                            t=t, dt=dt,
+                                                            pretrained=self.pretrained)
+            traj_log_rnd += log_pretrained_step - log_policy_step
+            x_rollout = x_next
+        # stores the string sequences for reward evaluation
+        with self.timer.section("expand.decode"):
+            childSequences = self.tokenizer.batch_decode(x_rollout)
+        ## FOR PEPTIDES ONLY ##
+        valid_x_children = []
+        valid_x_final = []
+        validSequences = []
+        valid_traj_log_rnd = []
+        with self.timer.section("expand.filter_is_peptide"):
+            for i in range(num_children):
+                # string sequence
+                childSeq = childSequences[i]
+                # check if the peptide is valid
+                if self.analyzer.is_peptide(childSeq):
+                    valid_x_children.append(x_children[i])
+                    valid_x_final.append(x_rollout[i])
+                    validSequences.append(childSeq)
+                    valid_traj_log_rnd.append(traj_log_rnd[i])
+                else:
+                    childTokens = {'seqs': x_children[i].to(dtype=torch.long), 'attention_mask': attn_mask}
+                    parentNode.addChildNode(tokens=childTokens,
+                                        log_rnd=child_log_rnd[i],
+                                        log_policy_step=child_log_policy_step[i],
+                                        log_pretrained_step=child_log_pretrained_step[i],
+                                        totalReward=np.zeros(self.num_obj))
+        del traj_log_rnd
+        log_targets = [
+            self.affinity1_log,
+            self.sol_log,
+            self.hemo_log,
+            self.nf_log,
+            self.permeability_log,
+        ]
+        if len(validSequences) != 0:
+            # add scores to log
+            with self.timer.section("expand.scoring_functions"):
+                score_vectors = np.asarray(self.rewardFunc(input_seqs=validSequences))
+            if score_vectors.ndim == 1:
+                score_vectors = score_vectors[:, None]
+            average_scores = score_vectors.T
+            num_scores = average_scores.shape[0]
+            score_len = average_scores.shape[1]
+            for idx, log_list in enumerate(log_targets):
+                if idx < num_scores:
+                    log_list.append(average_scores[idx])
+                else:
+                    log_list.append(np.zeros(score_len, dtype=np.float32))
+        else:
+            # set the values added to log as 0s if there are no valid sequences
+            empty = np.zeros(self.num_children, dtype=np.float32)
+            for log_list in log_targets:
+                log_list.append(empty)
+        # convert to tensor
+        if len(valid_x_final) == 0:
+            # log and bail out gracefully for this expansion
+            self.valid_fraction_log.append(0.0)
+            return
+        valid_x_final = torch.stack(valid_x_final, dim=0)
+        valid_traj_log_rnd = torch.stack(valid_traj_log_rnd, dim=0)
+        # update buffer and get rewards
+        with self.timer.section("expand.update_buffer"):
+            traj_log_rnds, scalar_rewards = self.updateBuffer(valid_x_final, valid_traj_log_rnd, score_vectors, childSequences)
+        allChildReward = np.zeros_like(score_vectors[0])
+        for i in range(len(score_vectors)):
+            reward = score_vectors[i]
+            # add to all child reward vector for backprop
+            allChildReward += reward # (num_objectives,)
+            # create node for sequence and add to the children node of parent
+            childTokens = {'seqs': valid_x_children[i].to(dtype=torch.long), 'attention_mask': attn_mask}
+            parentNode.addChildNode(tokens=childTokens,
+                                    log_rnd=child_log_rnd[i],
+                                    log_policy_step=child_log_policy_step[i],
+                                    log_pretrained_step=child_log_pretrained_step[i],
+                                    totalReward=reward)
+        ### END OF FOR PEPTIDES ONLY ###
+        valid_fraction = len(validSequences) / num_children
+        self.valid_fraction_log.append(valid_fraction)
+        # debugging
+        print(f"[EXPAND] iter={self.iter_num} parent_t={parentNode.timestep} "
+            f"num_children={num_children} valid={len(validSequences)} any_mask={any_mask_global}")
+        if score_vectors is not None:
+            print(f"[SCORES] min={np.min(score_vectors,0)} max={np.max(score_vectors,0)} "
+                f"nan_any={np.isnan(score_vectors).any()}")
+        # end debugging
+        self.reward_log.append(scalar_rewards)
+        self.logrnd_log.append(traj_log_rnds.detach().cpu().numpy())
+        allChildReward = allChildReward / len(validSequences) # normalize by number of valid children
+        # backpropogate all child rewards
+        with self.timer.section("expand.backprop"):
+            self.backprop(parentNode, allChildReward)
+    def backprop(self, node, allChildReward):
+        # backpropogate rewards through the path leading to the leaf node from the root
+        while node:
+            node.updateNode(allChildReward)
+            node = node.parentNode

roformer.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from transformers import RoFormerConfig, RoFormerForMaskedLM
+import torch.nn as nn
+from torch.nn.parallel import DistributedDataParallel as DDP
+import torch
+class Roformer(nn.Module):
+    def __init__(self, config, tokenizer, device=None):
+        super(Roformer, self).__init__()
+        self.tokenizer = tokenizer
+        self.vocab_size = self.tokenizer.vocab_size
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
+        roformer_config = RoFormerConfig(
+            vocab_size=self.tokenizer.vocab_size,
+            embedding_size=config.roformer.hidden_size,
+            hidden_size=config.roformer.hidden_size,
+            num_hidden_layers=config.roformer.n_layers,
+            num_attention_heads=config.roformer.n_heads,
+            intermediate_size=config.roformer.hidden_size * 4,
+            max_position_embeddings=config.roformer.max_position_embeddings,
+            hidden_dropout_prob=0.1,
+            attention_probs_dropout_prob=0.1,
+            pad_token_id=0,
+            rotary_value=False
+        )
+        self.model = RoFormerForMaskedLM(roformer_config).to(self.device)
+    def freeze_model(self):
+        for param in self.model.parameters():
+            param.requires_grad = False
+    def unfreeze_all_layers(self):
+        for param in self.model.parameters():
+            param.requires_grad = True
+    def unfreeze_n_layers(self, n):
+        num_layers = 8
+        for i, layer in enumerate(self.model.roformer.encoder.layer):
+            # finetune final n layers
+            if i >= num_layers - n:
+                # unfreeze query weights
+                for module in layer.attention.self.query.modules():
+                    for param in module.parameters():
+                         param.requires_grad = True
+                # unfreeze key weights
+                for module in layer.attention.self.key.modules():
+                    for param in module.parameters():
+                        param.requires_grad = True
+    def forward(self, input_ids, attn_mask):
+        input_ids = input_ids.to(self.device)
+        attn_mask = attn_mask.to(self.device)
+        # get logits embeddings
+        logits = self.model(input_ids=input_ids, attention_mask=attn_mask)
+        # return logits
+        #print(logits.logits)
+        return logits.logits
+    def save_model(self, save_dir):
+        self.model.save_pretrained(save_dir)
+        self.tokenizer.save_pretrained(save_dir)
+    @classmethod
+    def load_model(cls, save_dir, config, tokenizer):
+        roformer = cls(config, tokenizer)
+        roformer.model = RoFormerForMaskedLM.from_pretrained(save_dir)
+        return roformer

scoring/functions/binding.py ADDED Viewed

	@@ -0,0 +1,482 @@

+import sys
+import os, torch
+import numpy as np
+import torch
+import pandas as pd
+import torch.nn as nn
+import esm
+from transformers import AutoModelForMaskedLM
+def _sanitize_token_ids(input_ids: torch.Tensor, vocab_size: int, unk_id: int) -> torch.Tensor:
+    if vocab_size <= 0 or input_ids.numel() == 0:
+        return input_ids
+    if torch.any(input_ids >= vocab_size) or torch.any(input_ids < 0):
+        # Replace out-of-range IDs with UNK to avoid embedding OOB.
+        unk = torch.tensor(unk_id, device=input_ids.device, dtype=input_ids.dtype)
+        input_ids = torch.where((input_ids >= vocab_size) | (input_ids < 0), unk, input_ids)
+    return input_ids
+class ImprovedBindingPredictor(nn.Module):
+    def __init__(self,
+                 esm_dim=1280,
+                 smiles_dim=768,
+                 hidden_dim=512,
+                 n_heads=8,
+                 n_layers=3,
+                 dropout=0.1):
+        super().__init__()
+        # Define binding thresholds
+        self.tight_threshold = 7.5    # Kd/Ki/IC50 ≤ ~30nM
+        self.weak_threshold = 6.0     # Kd/Ki/IC50 > 1μM
+        # Project to same dimension
+        self.smiles_projection = nn.Linear(smiles_dim, hidden_dim)
+        self.protein_projection = nn.Linear(esm_dim, hidden_dim)
+        self.protein_norm = nn.LayerNorm(hidden_dim)
+        self.smiles_norm = nn.LayerNorm(hidden_dim)
+        # Cross attention blocks with layer norm
+        self.cross_attention_layers = nn.ModuleList([
+            nn.ModuleDict({
+                'attention': nn.MultiheadAttention(hidden_dim, n_heads, dropout=dropout),
+                'norm1': nn.LayerNorm(hidden_dim),
+                'ffn': nn.Sequential(
+                    nn.Linear(hidden_dim, hidden_dim * 4),
+                    nn.ReLU(),
+                    nn.Dropout(dropout),
+                    nn.Linear(hidden_dim * 4, hidden_dim)
+                ),
+                'norm2': nn.LayerNorm(hidden_dim)
+            }) for _ in range(n_layers)
+        ])
+        # Prediction heads
+        self.shared_head = nn.Sequential(
+            nn.Linear(hidden_dim * 2, hidden_dim),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+        )
+        # Regression head
+        self.regression_head = nn.Linear(hidden_dim, 1)
+        # Classification head (3 classes: tight, medium, loose binding)
+        self.classification_head = nn.Linear(hidden_dim, 3)
+    def get_binding_class(self, affinity):
+        """Convert affinity values to class indices
+        0: tight binding (>= 7.5)
+        1: medium binding (6.0-7.5)
+        2: weak binding (< 6.0)
+        """
+        if isinstance(affinity, torch.Tensor):
+            tight_mask = affinity >= self.tight_threshold
+            weak_mask = affinity < self.weak_threshold
+            medium_mask = ~(tight_mask | weak_mask)
+            classes = torch.zeros_like(affinity, dtype=torch.long)
+            classes[medium_mask] = 1
+            classes[weak_mask] = 2
+            return classes
+        else:
+            if affinity >= self.tight_threshold:
+                return 0  # tight binding
+            elif affinity < self.weak_threshold:
+                return 2  # weak binding
+            else:
+                return 1  # medium binding
+    def forward(self, protein_emb, smiles_emb):
+        protein = self.protein_norm(self.protein_projection(protein_emb))
+        smiles = self.smiles_norm(self.smiles_projection(smiles_emb))
+        #protein = protein.transpose(0, 1)
+        #smiles = smiles.transpose(0, 1)
+        # Cross attention layers
+        for layer in self.cross_attention_layers:
+            # Protein attending to SMILES
+            attended_protein = layer['attention'](
+                protein, smiles, smiles
+            )[0]
+            protein = layer['norm1'](protein + attended_protein)
+            protein = layer['norm2'](protein + layer['ffn'](protein))
+            # SMILES attending to protein
+            attended_smiles = layer['attention'](
+                smiles, protein, protein
+            )[0]
+            smiles = layer['norm1'](smiles + attended_smiles)
+            smiles = layer['norm2'](smiles + layer['ffn'](smiles))
+        # Get sequence-level representations
+        protein_pool = torch.mean(protein, dim=0)
+        smiles_pool = torch.mean(smiles, dim=0)
+        # Concatenate both representations
+        combined = torch.cat([protein_pool, smiles_pool], dim=-1)
+        # Shared features
+        shared_features = self.shared_head(combined)
+        regression_output = self.regression_head(shared_features)
+        classification_logits = self.classification_head(shared_features)
+        return regression_output, classification_logits
+class BindingAffinity:
+    def __init__(self, prot_seq, tokenizer, base_path, device=None, emb_model=None):
+        super().__init__()
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
+        # peptide embeddings
+        if emb_model is not None:
+            self.pep_model = emb_model.to(self.device).eval()
+        else:
+            self.pep_model = AutoModelForMaskedLM.from_pretrained('aaronfeller/PeptideCLM-23M-all').roformer.to(self.device).eval()
+        self.pep_tokenizer = tokenizer
+        self.unk_id = getattr(self.pep_tokenizer, "unk_token_id", None)
+        if self.unk_id is None:
+            self.unk_id = self.pep_tokenizer.vocab.get(self.pep_tokenizer.unk_token, 0)
+        self.pep_vocab_size = None
+        self.max_pep_len = None
+        if hasattr(self.pep_model, "model") and hasattr(self.pep_model.model, "roformer"):
+            self.pep_vocab_size = self.pep_model.model.roformer.embeddings.word_embeddings.num_embeddings
+            self.max_pep_len = self.pep_model.model.roformer.config.max_position_embeddings
+        elif hasattr(self.pep_model, "roformer"):
+            self.pep_vocab_size = self.pep_model.roformer.embeddings.word_embeddings.num_embeddings
+            self.max_pep_len = self.pep_model.roformer.config.max_position_embeddings
+        elif hasattr(self.pep_model, "get_input_embeddings"):
+            self.pep_vocab_size = self.pep_model.get_input_embeddings().num_embeddings
+            self.max_pep_len = getattr(self.pep_model.config, "max_position_embeddings", None)
+        self.model = ImprovedBindingPredictor().to(self.device)
+        checkpoint = torch.load(f'{base_path}/tr2d2-pep/scoring/functions/classifiers/binding-affinity.pt',
+                                map_location=self.device,
+                                weights_only=False)
+        self.model.load_state_dict(checkpoint['model_state_dict'])
+        self.model.eval()
+        self.esm_model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()  # load ESM-2 model
+        self.esm_model = self.esm_model.to(self.device).eval()
+        self.prot_tokenizer = alphabet.get_batch_converter() # load esm tokenizer
+        data = [("target", prot_seq)]
+        # get tokenized protein
+        _, _, prot_tokens = self.prot_tokenizer(data)
+        prot_tokens = prot_tokens.to(self.device)
+        with torch.no_grad():
+            results = self.esm_model.forward(prot_tokens, repr_layers=[33])  # Example with ESM-2
+            prot_emb = results["representations"][33]
+        self.prot_emb = prot_emb[0].to(self.device)
+        self.prot_emb = torch.mean(self.prot_emb, dim=0, keepdim=True)
+    def forward(self, input_seqs):
+        with torch.no_grad():
+            scores = []
+            for seq in input_seqs:
+                pep_tokens = self.pep_tokenizer(
+                    seq,
+                    return_tensors='pt',
+                    padding=True,
+                    truncation=self.max_pep_len is not None,
+                    max_length=self.max_pep_len,
+                )
+                pep_tokens = {k: v.to(self.device) for k, v in pep_tokens.items()}
+                pep_tokens["input_ids"] = _sanitize_token_ids(
+                    pep_tokens["input_ids"], int(self.pep_vocab_size or 0), int(self.unk_id)
+                )
+                with torch.no_grad():
+                    # Check if using custom Roformer wrapper or standard model
+                    if hasattr(self.pep_model, 'model'):
+                        # Custom roformer.Roformer wrapper - get hidden states from inner model
+                        emb = self.pep_model.model.roformer(
+                            input_ids=pep_tokens['input_ids'],
+                            attention_mask=pep_tokens.get('attention_mask'),
+                            output_hidden_states=True
+                        )
+                        pep_emb = emb.last_hidden_state.squeeze(0)
+                        pep_emb = torch.mean(pep_emb, dim=0, keepdim=True)
+                    else:
+                        # Standard AutoModelForMaskedLM
+                        emb = self.pep_model(
+                            input_ids=pep_tokens['input_ids'],
+                            attention_mask=pep_tokens.get('attention_mask'),
+                            output_hidden_states=True
+                        )
+                        pep_emb = emb.last_hidden_state.squeeze(0)
+                        pep_emb = torch.mean(pep_emb, dim=0, keepdim=True)
+                score, logits = self.model.forward(self.prot_emb, pep_emb)
+                scores.append(score.item())
+        return scores
+    def __call__(self, input_seqs: list):
+        return self.forward(input_seqs)
+class MultiTargetBindingAffinity:
+    """
+    Binding affinity predictor that can handle multiple protein targets dynamically.
+    Unlike BindingAffinity which pre-computes a single target's embedding,
+    this class can switch between different protein targets on-the-fly.
+    """
+    def __init__(self, tokenizer, base_path, device=None, emb_model=None):
+        """
+        Initialize multi-target binding affinity predictor.
+        Args:
+            tokenizer: Peptide tokenizer
+            base_path: Base path for model files
+            device: Device for computation (default: auto-detect)
+            emb_model: Optional pre-loaded embedding model
+        """
+        super().__init__()
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
+        # Peptide embeddings
+        if emb_model is not None:
+            self.pep_model = emb_model.to(self.device).eval()
+        else:
+            self.pep_model = AutoModelForMaskedLM.from_pretrained('aaronfeller/PeptideCLM-23M-all').roformer.to(self.device).eval()
+        self.pep_tokenizer = tokenizer
+        self.unk_id = getattr(self.pep_tokenizer, "unk_token_id", None)
+        if self.unk_id is None:
+            self.unk_id = self.pep_tokenizer.vocab.get(self.pep_tokenizer.unk_token, 0)
+        self.pep_vocab_size = None
+        self.max_pep_len = None
+        if hasattr(self.pep_model, "model") and hasattr(self.pep_model.model, "roformer"):
+            self.pep_vocab_size = self.pep_model.model.roformer.embeddings.word_embeddings.num_embeddings
+            self.max_pep_len = self.pep_model.model.roformer.config.max_position_embeddings
+        elif hasattr(self.pep_model, "roformer"):
+            self.pep_vocab_size = self.pep_model.roformer.embeddings.word_embeddings.num_embeddings
+            self.max_pep_len = self.pep_model.roformer.config.max_position_embeddings
+        elif hasattr(self.pep_model, "get_input_embeddings"):
+            self.pep_vocab_size = self.pep_model.get_input_embeddings().num_embeddings
+            self.max_pep_len = getattr(self.pep_model.config, "max_position_embeddings", None)
+        # Binding affinity prediction model
+        self.model = ImprovedBindingPredictor().to(self.device)
+        checkpoint = torch.load(f'{base_path}/tr2d2-pep/scoring/functions/classifiers/binding-affinity.pt',
+                                map_location=self.device,
+                                weights_only=False)
+        self.model.load_state_dict(checkpoint['model_state_dict'])
+        self.model.eval()
+        # Protein (ESM) model
+        self.esm_model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
+        self.esm_model = self.esm_model.to(self.device).eval()
+        self.prot_tokenizer = alphabet.get_batch_converter()
+        # Cache for protein embeddings (target_seq -> embedding)
+        self.prot_emb_cache = {}
+    def get_protein_embedding(self, prot_seq: str):
+        """
+        Get protein embedding, using cache if available.
+        Args:
+            prot_seq: Protein amino acid sequence
+        Returns:
+            Protein embedding tensor
+        """
+        # Check cache first
+        if prot_seq in self.prot_emb_cache:
+            return self.prot_emb_cache[prot_seq]
+        # Compute embedding
+        data = [("target", prot_seq)]
+        _, _, prot_tokens = self.prot_tokenizer(data)
+        prot_tokens = prot_tokens.to(self.device)
+        with torch.no_grad():
+            results = self.esm_model.forward(prot_tokens, repr_layers=[33])
+            prot_emb = results["representations"][33]
+        prot_emb = prot_emb[0].to(self.device)
+        prot_emb = torch.mean(prot_emb, dim=0, keepdim=True)
+        # Cache for future use
+        self.prot_emb_cache[prot_seq] = prot_emb
+        return prot_emb
+    def forward(self, input_seqs, prot_seq: str):
+        """
+        Predict binding affinity for peptide-protein pairs.
+        Args:
+            input_seqs: List of peptide sequences
+            prot_seq: Protein target sequence
+        Returns:
+            List of binding affinity scores
+        """
+        # Get protein embedding (cached if previously computed)
+        prot_emb = self.get_protein_embedding(prot_seq)
+        with torch.no_grad():
+            scores = []
+            for seq in input_seqs:
+                pep_tokens = self.pep_tokenizer(
+                    seq,
+                    return_tensors='pt',
+                    padding=True,
+                    truncation=self.max_pep_len is not None,
+                    max_length=self.max_pep_len,
+                )
+                pep_tokens = {k: v.to(self.device) for k, v in pep_tokens.items()}
+                pep_tokens["input_ids"] = _sanitize_token_ids(
+                    pep_tokens["input_ids"], int(self.pep_vocab_size or 0), int(self.unk_id)
+                )
+                with torch.no_grad():
+                    # Check if using custom Roformer wrapper or standard model
+                    if hasattr(self.pep_model, 'model'):
+                        # Custom roformer.Roformer wrapper - get hidden states from inner model
+                        emb = self.pep_model.model.roformer(
+                            input_ids=pep_tokens['input_ids'],
+                            attention_mask=pep_tokens.get('attention_mask'),
+                            output_hidden_states=True
+                        )
+                        pep_emb = emb.last_hidden_state.squeeze(0)
+                        pep_emb = torch.mean(pep_emb, dim=0, keepdim=True)
+                    else:
+                        # Standard AutoModelForMaskedLM
+                        emb = self.pep_model(
+                            input_ids=pep_tokens['input_ids'],
+                            attention_mask=pep_tokens.get('attention_mask'),
+                            output_hidden_states=True
+                        )
+                        pep_emb = emb.last_hidden_state.squeeze(0)
+                        pep_emb = torch.mean(pep_emb, dim=0, keepdim=True)
+                score, logits = self.model.forward(prot_emb, pep_emb)
+                scores.append(score.item())
+        return scores
+    def forward_from_probs(
+        self,
+        token_probs: torch.Tensor,
+        attention_mask: torch.Tensor,
+        prot_seq: str,
+    ) -> torch.Tensor:
+        """
+        Differentiable binding affinity from token probabilities.
+        """
+        if token_probs.dim() == 2:
+            token_probs = token_probs.unsqueeze(0)
+        token_probs = token_probs.to(self.device)
+        attention_mask = attention_mask.to(self.device)
+        roformer = None
+        if hasattr(self.pep_model, "model") and hasattr(self.pep_model.model, "roformer"):
+            roformer = self.pep_model.model.roformer
+            emb_weight = roformer.embeddings.word_embeddings.weight
+        elif hasattr(self.pep_model, "roformer"):
+            roformer = self.pep_model.roformer
+            emb_weight = roformer.embeddings.word_embeddings.weight
+        else:
+            emb_weight = self.pep_model.get_input_embeddings().weight
+        if token_probs.size(-1) != emb_weight.size(0):
+            raise ValueError(
+                f"Token vocab mismatch: probs={token_probs.size(-1)} vs model={emb_weight.size(0)}"
+            )
+        inputs_embeds = token_probs @ emb_weight
+        if roformer is not None:
+            outputs = roformer(inputs_embeds=inputs_embeds, attention_mask=attention_mask)
+            hidden = outputs.last_hidden_state
+        else:
+            outputs = self.pep_model(
+                inputs_embeds=inputs_embeds,
+                attention_mask=attention_mask,
+                output_hidden_states=True,
+                return_dict=True,
+            )
+            hidden = outputs.hidden_states[-1]
+        mask = attention_mask.to(hidden.dtype).unsqueeze(-1)
+        pep_emb = (hidden * mask).sum(dim=1) / mask.sum(dim=1).clamp_min(1.0)
+        prot_emb = self.get_protein_embedding(prot_seq).to(self.device)
+        prot_emb = prot_emb.expand(pep_emb.size(0), -1).unsqueeze(0)
+        pep_emb = pep_emb.unsqueeze(0)
+        score, _ = self.model.forward(prot_emb, pep_emb)
+        return score.squeeze(-1)
+    def __call__(self, input_seqs: list, prot_seq: str):
+        """
+        Predict binding affinity for peptide-protein pairs.
+        Args:
+            input_seqs: List of peptide sequences
+            prot_seq: Protein target sequence
+        Returns:
+            List of binding affinity scores
+        """
+        return self.forward(input_seqs, prot_seq)
+    def clear_cache(self):
+        """Clear the protein embedding cache to free memory."""
+        self.prot_emb_cache = {}
+class TargetSpecificBindingAffinity:
+    """
+    Wrapper that binds a specific protein target to MultiTargetBindingAffinity.
+    This allows using MultiTargetBindingAffinity with the standard BindingAffinity interface
+    where only peptide sequences need to be provided.
+    """
+    def __init__(self, multi_target_predictor: MultiTargetBindingAffinity, prot_seq: str):
+        """
+        Create a target-specific binding affinity predictor.
+        Args:
+            multi_target_predictor: The underlying multi-target predictor
+            prot_seq: The protein target sequence to use
+        """
+        self.predictor = multi_target_predictor
+        self.prot_seq = prot_seq
+    def forward(self, input_seqs):
+        """
+        Predict binding affinity for peptides against the bound target.
+        Args:
+            input_seqs: List of peptide sequences
+        Returns:
+            List of binding affinity scores
+        """
+        return self.predictor.forward(input_seqs, self.prot_seq)
+    def __call__(self, input_seqs: list):
+        """
+        Predict binding affinity for peptides against the bound target.
+        Args:
+            input_seqs: List of peptide sequences
+        Returns:
+            List of binding affinity scores
+        """
+        return self.forward(input_seqs)

scoring/functions/classifiers/hemolysis-xgboost.json ADDED Viewed

The diff for this file is too large to render. See raw diff

scoring/functions/classifiers/nonfouling-xgboost.json ADDED Viewed

The diff for this file is too large to render. See raw diff

scoring/functions/classifiers/permeability-xgboost.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e5d8c84bdad75f7091b5b3963133d4b0ebd180ae45654618ca6c090eee0bc06
+size 45249160

scoring/functions/classifiers/solubility-xgboost.json ADDED Viewed

The diff for this file is too large to render. See raw diff

scoring/functions/hemolysis.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import xgboost as xgb
+import torch
+import numpy as np
+from transformers import AutoModelForMaskedLM
+import warnings
+import numpy as np
+from rdkit import rdBase
+rdBase.DisableLog('rdApp.error')
+warnings.filterwarnings("ignore", category=DeprecationWarning)
+warnings.filterwarnings("ignore", category=UserWarning)
+warnings.filterwarnings("ignore", category=FutureWarning)
+class Hemolysis:
+    def __init__(self, tokenizer, base_path, device=None, emb_model=None):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
+        self.predictor = xgb.Booster(model_file=f'{base_path}/tr2d2-pep/scoring/functions/classifiers/hemolysis-xgboost.json')
+        self.emb_model = emb_model if emb_model is not None else AutoModelForMaskedLM.from_pretrained('aaronfeller/PeptideCLM-23M-all').roformer.to(device).eval()
+        self.tokenizer = tokenizer
+    def generate_embeddings(self, sequences):
+        embeddings = []
+        for sequence in sequences:
+            tokenized = self.tokenizer(sequence, return_tensors='pt')
+            tokenized = {k: v.to(self.device) for k, v in tokenized.items()}
+            with torch.no_grad():
+                output = self.emb_model(**tokenized)
+            # Mean pooling across sequence length
+            embedding = output.last_hidden_state.mean(dim=1).squeeze(0).cpu().numpy()
+            embeddings.append(embedding)
+        return np.array(embeddings)
+    def get_scores(self, input_seqs: list):
+        scores = np.ones(len(input_seqs))
+        features = self.generate_embeddings(input_seqs)
+        if len(features) == 0:
+            return scores
+        features = np.nan_to_num(features, nan=0.)
+        features = np.clip(features, np.finfo(np.float32).min, np.finfo(np.float32).max)
+        features = xgb.DMatrix(features)
+        probs = self.predictor.predict(features)
+        # return the probability of it being not hemolytic
+        return scores - probs
+    def __call__(self, input_seqs: list):
+        scores = self.get_scores(input_seqs)
+        return scores
+def unittest():
+    hemo = Hemolysis()
+    seq = ["[te]NCC(=O)N[C@H](CS)C(=O)N[C@@H](CO)C(=O)NCC(=O)N[C@@H](CC1=CN=C-N1)C(=O)N[C@@H](CC(=O)N)C(=O)N[C@@H](CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@H](c1ccc(cc1)F)C(=O)N[C@@H]([C@H](CC)C)C(=O)N[C@@H](CCCO)C(=O)N[C@@H](CC1=CN=C-N1)C(=O)N[C@@H](CCC(=O)O)C(=O)N[C@@H](CO)C(=O)O"]
+    print(hemo.tokenizer.vocab_size)
+    scores = hemo(input_seqs=seq)
+    print(scores)
+if __name__ == '__main__':
+    unittest()

scoring/functions/nonfouling.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import sys
+import os
+import xgboost as xgb
+import torch
+import numpy as np
+from transformers import AutoModelForMaskedLM
+import warnings
+import numpy as np
+from rdkit import Chem, rdBase, DataStructs
+rdBase.DisableLog('rdApp.error')
+warnings.filterwarnings("ignore", category=DeprecationWarning)
+warnings.filterwarnings("ignore", category=UserWarning)
+warnings.filterwarnings("ignore", category=FutureWarning)
+class Nonfouling:
+    def __init__(self, tokenizer, base_path, device=None, emb_model=None):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
+        self.predictor = xgb.Booster(model_file=f'{base_path}/tr2d2-pep/scoring/functions/classifiers/nonfouling-xgboost.json')
+        self.emb_model = emb_model if emb_model is not None else AutoModelForMaskedLM.from_pretrained('aaronfeller/PeptideCLM-23M-all').roformer.to(device).eval()
+        self.tokenizer = tokenizer
+    def generate_embeddings(self, sequences):
+        embeddings = []
+        for sequence in sequences:
+            tokenized = self.tokenizer(sequence, return_tensors='pt')
+            tokenized = {k: v.to(self.device) for k, v in tokenized.items()}
+            with torch.no_grad():
+                output = self.emb_model(**tokenized)
+            # Mean pooling across sequence length
+            embedding = output.last_hidden_state.mean(dim=1).squeeze(0).cpu().numpy()
+            embeddings.append(embedding)
+        return np.array(embeddings)
+    def get_scores(self, input_seqs: list):
+        scores = np.zeros(len(input_seqs))
+        features = self.generate_embeddings(input_seqs)
+        if len(features) == 0:
+            return scores
+        features = np.nan_to_num(features, nan=0.)
+        features = np.clip(features, np.finfo(np.float32).min, np.finfo(np.float32).max)
+        features = xgb.DMatrix(features)
+        scores = self.predictor.predict(features)
+        # return the probability of it being not hemolytic
+        return scores
+    def __call__(self, input_seqs: list):
+        scores = self.get_scores(input_seqs)
+        return scores
+def unittest():
+    nf = Nonfouling()
+    seq = ["NCC(=O)N[C@H](CS)C(=O)N[C@@H](CO)C(=O)NCC(=O)N[C@@H](CC1=CN=C-N1)C(=O)N[C@@H](CC(=O)N)C(=O)N[C@@H](CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@H](c1ccc(cc1)F)C(=O)N[C@@H]([C@H](CC)C)C(=O)N[C@@H](CCCO)C(=O)N[C@@H](CC1=CN=C-N1)C(=O)N[C@@H](CCC(=O)O)C(=O)N[C@@H](CO)C(=O)O"]
+    scores = nf(input_seqs=seq)
+    print(scores)
+if __name__ == '__main__':
+    unittest()

scoring/functions/permeability.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import sys
+import os
+import xgboost as xgb
+import torch
+import numpy as np
+from transformers import AutoModelForMaskedLM
+import warnings
+import numpy as np
+from rdkit.Chem import Descriptors, rdMolDescriptors
+from rdkit import Chem, rdBase, DataStructs
+from rdkit.Chem import AllChem
+from typing import List
+from transformers import AutoModelForMaskedLM
+rdBase.DisableLog('rdApp.error')
+warnings.filterwarnings("ignore", category=DeprecationWarning)
+warnings.filterwarnings("ignore", category=UserWarning)
+warnings.filterwarnings("ignore", category=FutureWarning)
+def fingerprints_from_smiles(smiles: List, size=2048):
+    """ Create ECFP fingerprints of smiles, with validity check """
+    fps = []
+    valid_mask = []
+    for i, smile in enumerate(smiles):
+        mol = Chem.MolFromSmiles(smile)
+        valid_mask.append(int(mol is not None))
+        fp = fingerprints_from_mol(mol, size=size) if mol else np.zeros((1, size))
+        fps.append(fp)
+    fps = np.concatenate(fps, axis=0)
+    return fps, valid_mask
+def fingerprints_from_mol(molecule, radius=3, size=2048, hashed=False):
+    """ Create ECFP fingerprint of a molecule """
+    if hashed:
+        fp_bits = AllChem.GetHashedMorganFingerprint(molecule, radius, nBits=size)
+    else:
+        fp_bits = AllChem.GetMorganFingerprintAsBitVect(molecule, radius, nBits=size)
+    fp_np = np.zeros((1,))
+    DataStructs.ConvertToNumpyArray(fp_bits, fp_np)
+    return fp_np.reshape(1, -1)
+def getMolDescriptors(mol, missingVal=0):
+    """ calculate the full list of descriptors for a molecule """
+    values, names = [], []
+    for nm, fn in Descriptors._descList:
+        try:
+            val = fn(mol)
+        except:
+            val = missingVal
+        values.append(val)
+        names.append(nm)
+    custom_descriptors = {'hydrogen-bond donors': rdMolDescriptors.CalcNumLipinskiHBD,
+                          'hydrogen-bond acceptors': rdMolDescriptors.CalcNumLipinskiHBA,
+                          'rotatable bonds': rdMolDescriptors.CalcNumRotatableBonds,}
+    for nm, fn in custom_descriptors.items():
+        try:
+            val = fn(mol)
+        except:
+            val = missingVal
+        values.append(val)
+        names.append(nm)
+    return values, names
+def get_pep_dps_from_smi(smi):
+    try:
+        mol = Chem.MolFromSmiles(smi)
+    except:
+        print(f"convert smi {smi} to molecule failed!")
+        mol = None
+    dps, _ = getMolDescriptors(mol)
+    return np.array(dps)
+def get_pep_dps(smi_list):
+    if len(smi_list) == 0:
+        return np.zeros((0, 213))
+    return np.array([get_pep_dps_from_smi(smi) for smi in smi_list])
+def check_smi_validity(smiles: list):
+    valid_smi, valid_idx = [], []
+    for idx, smi in enumerate(smiles):
+        try:
+            mol = Chem.MolFromSmiles(smi) if smi else None
+            if mol:
+                valid_smi.append(smi)
+                valid_idx.append(idx)
+        except Exception as e:
+            # logger.debug(f'Error: {e} in smiles {smi}')
+            pass
+    return valid_smi, valid_idx
+class Permeability:
+    def __init__(self, tokenizer, base_path, device=None, emb_model=None):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
+        self.predictor = xgb.Booster(model_file=f'{base_path}/tr2d2-pep/scoring/functions/classifiers/permeability-xgboost.json')
+        if emb_model is not None:
+            self.emb_model = emb_model.to(self.device).eval()
+        else:
+            self.emb_model = AutoModelForMaskedLM.from_pretrained('aaronfeller/PeptideCLM-23M-all').roformer.to(device).eval()
+        self.tokenizer = tokenizer
+    def generate_embeddings(self, sequences):
+        embeddings = []
+        for sequence in sequences:
+            tokenized = self.tokenizer(sequence, return_tensors='pt')
+            tokenized = {k: v.to(self.device) for k, v in tokenized.items()}
+            with torch.no_grad():
+                output = self.emb_model(**tokenized)
+            # Mean pooling across sequence length
+            embedding = output.last_hidden_state.mean(dim=1).squeeze(0).cpu().numpy()
+            embeddings.append(embedding)
+        return np.array(embeddings)
+    def get_features(self, input_seqs: list, dps=False, fps=False):
+        #valid_smiles, valid_idxes = check_smi_validity(input_seqs)
+        if fps:
+            fingerprints = fingerprints_from_smiles(input_seqs)[0]
+        else:
+            fingerprints = torch.empty((len(input_seqs), 0))
+        if dps:
+            descriptors = get_pep_dps(input_seqs)
+        else:
+            descriptors = torch.empty((len(input_seqs), 0))
+        embeddings = self.generate_embeddings(input_seqs)
+        # logger.debug(f'X_fps.shape: {X_fps.shape}, X_dps.shape: {X_dps.shape}')
+        features = np.concatenate([fingerprints, descriptors, embeddings], axis=1)
+        return features
+    def get_scores(self, input_seqs: list):
+        scores = -10 * np.ones(len(input_seqs))
+        features = self.get_features(input_seqs)
+        if len(features) == 0:
+            return scores
+        features = np.nan_to_num(features, nan=0.)
+        features = np.clip(features, np.finfo(np.float32).min, np.finfo(np.float32).max)
+        features = xgb.DMatrix(features)
+        scores = self.predictor.predict(features)
+        return scores
+    def __call__(self, input_seqs: list):
+        scores = self.get_scores(input_seqs)
+        return scores
+def unittest():
+    permeability = Permeability()
+    seq = ['N[C@@H](CCCNC(=N)N)C(=O)N[C@@H](Cc1cNc2c1cc(O)cc2)C(=O)N[C@@H](CC1=CN=C-N1)C(=O)N[C@@H](CCCNC(=N)N)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CCC(=O)O)C(=O)N[C@@H]([C@@H](O)C(C)C)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@H](CC(=CN2)C1=C2C=CC=C1)C(=O)O']
+    scores = permeability(input_seqs=seq)
+    print(scores)
+if __name__ == '__main__':
+    unittest()

scoring/functions/solubility.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import xgboost as xgb
+import torch
+import numpy as np
+from transformers import AutoModelForMaskedLM
+import warnings
+import numpy as np
+from rdkit import rdBase
+rdBase.DisableLog('rdApp.error')
+warnings.filterwarnings("ignore", category=DeprecationWarning)
+warnings.filterwarnings("ignore", category=UserWarning)
+warnings.filterwarnings("ignore", category=FutureWarning)
+class Solubility:
+    def __init__(self, tokenizer, base_path, device=None, emb_model=None):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
+        self.predictor = xgb.Booster(model_file=f'{base_path}/tr2d2-pep/scoring/functions/classifiers/solubility-xgboost.json')
+        if emb_model is not None:
+            self.emb_model = emb_model.to(self.device).eval()
+        else:
+            self.emb_model = AutoModelForMaskedLM.from_pretrained('aaronfeller/PeptideCLM-23M-all').roformer.to(self.device).eval()
+        self.tokenizer = tokenizer
+    def generate_embeddings(self, sequences):
+        embeddings = []
+        for sequence in sequences:
+            tokenized = self.tokenizer(sequence, return_tensors='pt')
+            tokenized = {k: v.to(self.device) for k, v in tokenized.items()}
+            with torch.no_grad():
+                output = self.emb_model(**tokenized)
+            # Mean pooling across sequence length
+            embedding = output.last_hidden_state.mean(dim=1).squeeze(0).cpu().numpy()
+            embeddings.append(embedding)
+        return np.array(embeddings)
+    def get_scores(self, input_seqs: list):
+        scores = np.zeros(len(input_seqs))
+        features = self.generate_embeddings(input_seqs)
+        if len(features) == 0:
+            return scores
+        features = np.nan_to_num(features, nan=0.)
+        features = np.clip(features, np.finfo(np.float32).min, np.finfo(np.float32).max)
+        features = xgb.DMatrix(features)
+        scores = self.predictor.predict(features)
+        return scores
+    def __call__(self, input_seqs: list):
+        scores = self.get_scores(input_seqs)
+        return scores
+def unittest():
+    solubility = Solubility()
+    seq = ["NCC(=O)N[C@H](CS)C(=O)N[C@@H](CO)C(=O)NCC(=O)N[C@@H](CC1=CN=C-N1)C(=O)N[C@@H](CC(=O)N)C(=O)N[C@@H](CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@H](c1ccc(cc1)F)C(=O)N[C@@H]([C@H](CC)C)C(=O)N[C@@H](CCCO)C(=O)N[C@@H](CC1=CN=C-N1)C(=O)N[C@@H](CCC(=O)O)C(=O)N[C@@H](CO)C(=O)O"]
+    scores = solubility(input_seqs=seq)
+    print(scores)
+if __name__ == '__main__':
+    unittest()

scoring/scoring_functions.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import numpy as np
+import torch
+from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
+from transformers import AutoModelForMaskedLM
+from scoring.functions.binding import BindingAffinity
+from scoring.functions.permeability import Permeability
+from scoring.functions.solubility import Solubility
+from scoring.functions.hemolysis import Hemolysis
+from scoring.functions.nonfouling import Nonfouling
+base_path = 'To Be Added'
+def resolve_device(requested):
+    if requested is None or str(requested).lower() == "auto":
+        if torch.cuda.is_available() and torch.cuda.device_count() > 0:
+            return torch.device("cuda:0")
+        return torch.device("cpu")
+    try:
+        device = torch.device(requested)
+    except Exception:
+        return torch.device("cpu")
+    if device.type != "cuda":
+        return device
+    if not torch.cuda.is_available() or torch.cuda.device_count() == 0:
+        return torch.device("cpu")
+    index = device.index if device.index is not None else 0
+    if index is None or index < 0 or index >= torch.cuda.device_count():
+        return torch.device("cuda:0")
+    return torch.device(f"cuda:{index}")
+class ScoringFunctions:
+    def __init__(self, score_func_names=None, prot_seqs=None, device=None):
+        """
+        Class for generating score vectors given generated sequence
+        Args:
+            score_func_names: list of scoring function names to be evaluated
+            score_weights: weights to scale scores (default: 1)
+            target_protein: sequence of target protein binder
+        """
+        device = resolve_device(device)
+        emb_model = AutoModelForMaskedLM.from_pretrained(
+            'aaronfeller/PeptideCLM-23M-all'
+        ).roformer.to(device).eval()
+        tokenizer = SMILES_SPE_Tokenizer(f'{base_path}/tr2d2-pep/tokenizer/new_vocab.txt',
+                                        f'{base_path}/tr2d2-pep/tokenizer/new_splits.txt')
+        prot_seqs = prot_seqs if prot_seqs is not None else []
+        if score_func_names is None:
+            # just do unmasking based on validity of peptide bonds
+            self.score_func_names = []
+        else:
+            self.score_func_names = score_func_names
+        # self.weights = np.array([1] * len(self.score_func_names) if score_weights is None else score_weights)
+        # binding affinities
+        self.target_protein = prot_seqs
+        print(len(prot_seqs))
+        if ('binding_affinity1' in score_func_names) and (len(prot_seqs) == 1):
+            binding_affinity1 = BindingAffinity(prot_seqs[0], tokenizer=tokenizer, base_path=base_path, device=device)
+            binding_affinity2 = None
+        elif ('binding_affinity1' in score_func_names) and ('binding_affinity2' in score_func_names) and (len(prot_seqs) == 2):
+            binding_affinity1 = BindingAffinity(prot_seqs[0], tokenizer=tokenizer, base_path=base_path, device=device)
+            binding_affinity2 = BindingAffinity(prot_seqs[1], tokenizer=tokenizer, base_path=base_path, device=device)
+        else:
+            print("here")
+            binding_affinity1 = None
+            binding_affinity2 = None
+        permeability = Permeability(tokenizer=tokenizer, base_path=base_path, device=device, emb_model=emb_model)
+        sol = Solubility(tokenizer=tokenizer, base_path=base_path, device=device, emb_model=emb_model)
+        nonfouling = Nonfouling(tokenizer=tokenizer, base_path=base_path, device=device, emb_model=emb_model)
+        hemo = Hemolysis(tokenizer=tokenizer, base_path=base_path, device=device, emb_model=emb_model)
+        self.all_funcs = {'binding_affinity1': binding_affinity1,
+                          'binding_affinity2': binding_affinity2,
+                          'permeability': permeability,
+                          'nonfouling': nonfouling,
+                          'solubility': sol,
+                          'hemolysis': hemo
+                          }
+    def forward(self, input_seqs):
+        scores = []
+        for i, score_func in enumerate(self.score_func_names):
+            score = self.all_funcs[score_func](input_seqs = input_seqs)
+            scores.append(score)
+        # convert to numpy arrays with shape (num_sequences, num_functions)
+        scores = np.float32(scores).T
+        return scores
+    def __call__(self, input_seqs: list):
+        return self.forward(input_seqs)

setup.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from setuptools import setup, find_packages
+setup(
+    name="td3b",
+    version="0.1.0",
+    description="TD3B: Transition-Directed Discrete Diffusion for Allosteric Binder Generation",
+    packages=find_packages(),
+    python_requires=">=3.10",
+)

td3b/__init__.py ADDED Viewed

	@@ -0,0 +1,30 @@

+"""
+TD3B: Transition-Directed Discrete Diffusion for Binders
+A module extending TR2-D2 with directional allosteric control.
+"""
+from .direction_oracle import DirectionalOracle
+from .td3b_scoring import TD3BRewardFunction, TD3BConfidenceWeighting, create_td3b_reward_function
+from .td3b_losses import ContrastiveLoss, InfoNCELoss, TD3BTotalLoss, extract_embeddings_from_mdlm
+from .td3b_mcts import TD3B_MCTS, create_td3b_mcts
+from .td3b_finetune import td3b_finetune, add_td3b_sampling_to_model
+from .data_utils import TD3BDataset, load_td3b_data
+__all__ = [
+    'DirectionalOracle',
+    'TD3BRewardFunction',
+    'TD3BConfidenceWeighting',
+    'create_td3b_reward_function',
+    'ContrastiveLoss',
+    'InfoNCELoss',
+    'TD3BTotalLoss',
+    'extract_embeddings_from_mdlm',
+    'TD3B_MCTS',
+    'create_td3b_mcts',
+    'td3b_finetune',
+    'add_td3b_sampling_to_model',
+    'TD3BDataset',
+    'load_td3b_data',
+]
+__version__ = '0.1.0'

td3b/data_utils.py ADDED Viewed

	@@ -0,0 +1,392 @@

+"""
+TD3B Data Utilities
+Handles loading and preprocessing of TD3B_data.csv for both oracle training and finetuning.
+"""
+import pandas as pd
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from typing import Dict, List, Optional, Tuple
+import sys
+try:
+    from rdkit import Chem
+except ImportError:  # pragma: no cover - rdkit may be optional in some setups
+    Chem = None
+sys.path.append('..')
+AA_SET = set("ACDEFGHIKLMNPQRSTVWY")
+def is_amino_acid_sequence(seq: str) -> bool:
+    if not isinstance(seq, str) or not seq:
+        return False
+    seq = seq.strip().upper()
+    return all(ch in AA_SET for ch in seq)
+def aa_sequence_to_smiles(seq: str) -> Optional[str]:
+    if Chem is None or not is_amino_acid_sequence(seq):
+        return None
+    try:
+        mol = Chem.MolFromSequence(seq)
+    except Exception:
+        return None
+    if mol is None:
+        return None
+    return Chem.MolToSmiles(mol, isomericSmiles=True)
+def peptide_seq_to_smiles(seq: str) -> str:
+    smiles = aa_sequence_to_smiles(seq)
+    return smiles if smiles is not None else seq
+def smiles_token_length(smiles: str, tokenizer) -> int:
+    if tokenizer is None:
+        return len(smiles)
+    tokens = tokenizer(smiles, return_tensors="pt")["input_ids"][0]
+    return int(tokens.numel())
+class TD3BDataset(Dataset):
+    """
+    Dataset for TD3B that loads peptide-protein pairs with directional labels.
+    Supports both:
+    1. Oracle training: uses all pairs for training f_φ
+    2. Finetuning: provides target proteins for conditioning during RL
+    """
+    def __init__(
+        self,
+        data_path: str,
+        mode: str = 'oracle',  # 'oracle' or 'finetune'
+        peptide_tokenizer=None,
+        protein_tokenizer=None,
+        max_peptide_length: int = 200,
+        max_protein_length: int = 1000,
+        target_protein_id: Optional[str] = None,  # For finetuning mode
+        convert_peptide_to_smiles: bool = True,
+    ):
+        """
+        Args:
+            data_path: Path to TD3B_data.csv
+            mode: 'oracle' for training f_φ, 'finetune' for RL conditioning
+            peptide_tokenizer: Tokenizer for peptide sequences
+            protein_tokenizer: Tokenizer for protein sequences (ESM-2)
+            max_peptide_length: Maximum peptide sequence length
+            max_protein_length: Maximum protein sequence length
+            target_protein_id: UniProt ID for target protein (finetuning mode)
+        """
+        self.mode = mode
+        self.data_path = data_path
+        self.peptide_tokenizer = peptide_tokenizer
+        self.protein_tokenizer = protein_tokenizer
+        self.max_peptide_length = max_peptide_length
+        self.max_protein_length = max_protein_length
+        self.convert_peptide_to_smiles = convert_peptide_to_smiles
+        # Load data
+        self.data = pd.read_csv(data_path)
+        print(f"Loaded {len(self.data)} peptide-protein pairs from {data_path}")
+        # Filter by target protein if in finetune mode
+        if mode == 'finetune' and target_protein_id is not None:
+            self.data = self.data[self.data['Target_UniProt_ID'] == target_protein_id]
+            print(f"Filtered to {len(self.data)} pairs for target {target_protein_id}")
+        # Process labels
+        self.label_map = {
+            'agonist': 1.0,
+            'antagonist': -1.0,
+            'neutral': 0.0,
+        }
+        # Convert action descriptions to numerical labels
+        self.data['numeric_label'] = self.data['label'].map(self.label_map)
+        # Assign confidence based on action description
+        self.data['confidence'] = self.data['Action'].apply(self._action_to_confidence)
+    def _action_to_confidence(self, action: str) -> float:
+        """
+        Convert action description to confidence score.
+        Full agonist/antagonist: 1.0
+        Partial/Weak: 0.7
+        Others: 0.5
+        """
+        action_lower = action.lower()
+        if 'full' in action_lower:
+            return 1.0
+        elif 'partial' in action_lower or 'weak' in action_lower:
+            return 0.7
+        elif 'slows' in action_lower or 'modulator' in action_lower:
+            return 0.5
+        else:
+            return 0.8  # Default for unspecified agonist/antagonist
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        row = self.data.iloc[idx]
+        # Get sequences
+        peptide_seq = row['Ligand_Sequence']
+        protein_seq = row['Target_Sequence']
+        peptide_smiles = self._peptide_to_smiles(peptide_seq)
+        peptide_smiles_length = smiles_token_length(peptide_smiles, self.peptide_tokenizer)
+        # Tokenize (placeholder - actual tokenization depends on mode)
+        if self.peptide_tokenizer is not None:
+            peptide_tokens = self._tokenize_peptide(peptide_smiles)
+        else:
+            peptide_tokens = torch.zeros(self.max_peptide_length, dtype=torch.long)
+        if self.protein_tokenizer is not None:
+            protein_tokens = self._tokenize_protein(protein_seq)
+        else:
+            protein_tokens = self._tokenize_protein_placeholder(protein_seq)
+        # Get label and confidence
+        label = torch.tensor(row['numeric_label'], dtype=torch.float32)
+        confidence = torch.tensor(row['confidence'], dtype=torch.float32)
+        return {
+            'peptide_seq': peptide_seq,
+            'peptide_smiles': peptide_smiles,
+            'peptide_smiles_length': peptide_smiles_length,
+            'protein_seq': protein_seq,
+            'peptide_tokens': peptide_tokens,
+            'protein_tokens': protein_tokens,
+            'label': label,
+            'confidence': confidence,
+            'target_id': row['Target_UniProt_ID'],
+            'ligand_id': row['Ligand_UniProt_ID'],
+            'action': row['Action']
+        }
+    def _peptide_to_smiles(self, peptide_seq: str) -> str:
+        if not self.convert_peptide_to_smiles:
+            return peptide_seq
+        return peptide_seq_to_smiles(peptide_seq)
+    def _tokenize_peptide(self, peptide_seq: str) -> torch.Tensor:
+        """Tokenize peptide sequence using provided tokenizer."""
+        tokens = self.peptide_tokenizer(
+            peptide_seq,
+            return_tensors='pt',
+            padding='max_length',
+            max_length=self.max_peptide_length,
+            truncation=True
+        )['input_ids'].squeeze(0)
+        return tokens
+    def _tokenize_protein_placeholder(self, protein_seq: str) -> torch.Tensor:
+        """
+        Placeholder protein tokenizer (character-level).
+        NOTE: Replace with ESM-2 tokenizer in production:
+            from esm import pretrained
+            _, alphabet = pretrained.esm2_t33_650M_UR50D()
+            batch_converter = alphabet.get_batch_converter()
+            _, _, tokens = batch_converter([("protein", protein_seq)])
+        """
+        # Amino acid to index mapping
+        aa_to_idx = {aa: i+1 for i, aa in enumerate('ACDEFGHIKLMNPQRSTVWY')}
+        aa_to_idx['<PAD>'] = 0
+        aa_to_idx['<UNK>'] = 21
+        # Convert to indices
+        indices = [aa_to_idx.get(aa, aa_to_idx['<UNK>']) for aa in protein_seq]
+        # Pad or truncate
+        if len(indices) > self.max_protein_length:
+            indices = indices[:self.max_protein_length]
+        else:
+            indices += [0] * (self.max_protein_length - len(indices))
+        return torch.tensor(indices, dtype=torch.long)
+    def _tokenize_protein(self, protein_seq: str) -> torch.Tensor:
+        """Tokenize protein using ESM-2 tokenizer if available."""
+        if self.protein_tokenizer is None:
+            return self._tokenize_protein_placeholder(protein_seq)
+        # Use ESM-2 tokenizer
+        # TODO: Implement when ESM-2 is integrated
+        return self._tokenize_protein_placeholder(protein_seq)
+    def get_target_proteins(self) -> Dict[str, str]:
+        """
+        Get dictionary of unique target proteins.
+        Returns:
+            dict: {UniProt_ID: Sequence}
+        """
+        unique_targets = self.data.drop_duplicates(subset=['Target_UniProt_ID'])
+        return dict(zip(unique_targets['Target_UniProt_ID'], unique_targets['Target_Sequence']))
+    def get_ligands_for_target(self, target_id: str) -> List[Dict]:
+        """
+        Get all ligands (peptides) for a specific target protein.
+        Args:
+            target_id: Target protein UniProt ID
+        Returns:
+            List of dicts with ligand info
+        """
+        target_data = self.data[self.data['Target_UniProt_ID'] == target_id]
+        ligands = []
+        for _, row in target_data.iterrows():
+            ligands.append({
+                'sequence': row['Ligand_Sequence'],
+                'uniprot_id': row['Ligand_UniProt_ID'],
+                'label': row['numeric_label'],
+                'confidence': row['confidence'],
+                'action': row['Action']
+            })
+        return ligands
+def load_td3b_data(
+    data_path: str,
+    mode: str = 'oracle',
+    target_protein_id: Optional[str] = None
+) -> Tuple[pd.DataFrame, Dict]:
+    """
+    Load and summarize TD3B data.
+    Args:
+        data_path: Path to TD3B_data.csv
+        mode: 'oracle' or 'finetune'
+        target_protein_id: Filter by target protein (finetuning mode)
+    Returns:
+        data: Filtered DataFrame
+        stats: Dictionary of statistics
+    """
+    data = pd.read_csv(data_path)
+    # Filter if needed
+    if mode == 'finetune' and target_protein_id is not None:
+        data = data[data['Target_UniProt_ID'] == target_protein_id]
+    # Compute statistics
+    stats = {
+        'total_pairs': len(data),
+        'unique_targets': data['Target_UniProt_ID'].nunique(),
+        'unique_ligands': data['Ligand_UniProt_ID'].nunique(),
+        'agonist_count': (data['label'] == 'agonist').sum(),
+        'antagonist_count': (data['label'] == 'antagonist').sum(),
+        'action_distribution': data['Action'].value_counts().to_dict()
+    }
+    return data, stats
+def create_target_dataset_for_finetuning(
+    data_path: str,
+    target_protein_id: str,
+    desired_direction: str = 'agonist'
+) -> Dict:
+    """
+    Create a dataset for TD3B finetuning focused on a specific target.
+    Args:
+        data_path: Path to TD3B_data.csv
+        target_protein_id: Target protein UniProt ID
+        desired_direction: 'agonist' or 'antagonist'
+    Returns:
+        dict with target protein info and example ligands
+    """
+    data = pd.read_csv(data_path)
+    # Get target protein info
+    target_data = data[data['Target_UniProt_ID'] == target_protein_id]
+    if len(target_data) == 0:
+        raise ValueError(f"No data found for target {target_protein_id}")
+    # Get protein sequence (should be same for all rows)
+    protein_seq = target_data.iloc[0]['Target_Sequence']
+    # Get ligands with desired direction
+    direction_map = {'agonist': 'agonist', 'antagonist': 'antagonist'}
+    direction_ligands = target_data[target_data['label'] == direction_map[desired_direction]]
+    # Also get opposite direction for contrastive learning
+    opposite_direction = 'antagonist' if desired_direction == 'agonist' else 'agonist'
+    opposite_ligands = target_data[target_data['label'] == opposite_direction]
+    return {
+        'target_protein_id': target_protein_id,
+        'target_protein_seq': protein_seq,
+        'desired_direction': desired_direction,
+        'n_desired_examples': len(direction_ligands),
+        'n_opposite_examples': len(opposite_ligands),
+        'desired_ligands': direction_ligands[['Ligand_Sequence', 'Action', 'Ligand_UniProt_ID']].to_dict('records'),
+        'opposite_ligands': opposite_ligands[['Ligand_Sequence', 'Action', 'Ligand_UniProt_ID']].to_dict('records')
+    }
+if __name__ == "__main__":
+    # Example usage
+    data_path = "../TD3B_data.csv"
+    print("=" * 80)
+    print("TD3B Data Loading Example")
+    print("=" * 80)
+    # Load and summarize data
+    data, stats = load_td3b_data(data_path, mode='oracle')
+    print("\nDataset Statistics:")
+    for key, value in stats.items():
+        print(f"  {key}: {value}")
+    # Create dataset for oracle training
+    print("\n" + "=" * 80)
+    print("Oracle Training Dataset")
+    print("=" * 80)
+    dataset = TD3BDataset(data_path, mode='oracle')
+    print(f"Dataset size: {len(dataset)}")
+    # Sample first item
+    sample = dataset[0]
+    print(f"\nSample item:")
+    print(f"  Target: {sample['target_id']}")
+    print(f"  Ligand: {sample['ligand_id']}")
+    print(f"  Label: {sample['label'].item()}")
+    print(f"  Confidence: {sample['confidence'].item()}")
+    print(f"  Action: {sample['action']}")
+    # Create finetuning dataset for a specific target
+    print("\n" + "=" * 80)
+    print("Finetuning Dataset Example")
+    print("=" * 80)
+    # Get first target
+    targets = dataset.get_target_proteins()
+    first_target_id = list(targets.keys())[0]
+    finetune_info = create_target_dataset_for_finetuning(
+        data_path,
+        first_target_id,
+        desired_direction='agonist'
+    )
+    print(f"\nTarget: {finetune_info['target_protein_id']}")
+    print(f"Desired direction: {finetune_info['desired_direction']}")
+    print(f"Number of agonist examples: {finetune_info['n_desired_examples']}")
+    print(f"Number of antagonist examples: {finetune_info['n_opposite_examples']}")

td3b/direction_oracle.py ADDED Viewed

	@@ -0,0 +1,709 @@

+#!/usr/bin/env python3
+"""
+GPCR Agonist Classifier - TR2-D2 Inference Script
+"""
+import argparse
+import logging
+import os
+import sys
+from types import SimpleNamespace
+from typing import Dict, List, Optional, Tuple
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import EsmModel, EsmTokenizer
+PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, PROJECT_ROOT)
+from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
+from roformer import Roformer
+logger = logging.getLogger(__name__)
+def resolve_device(requested: Optional[str]) -> torch.device:
+    if requested is None or str(requested).lower() == "auto":
+        if torch.cuda.is_available() and torch.cuda.device_count() > 0:
+            return torch.device("cuda:0")
+        return torch.device("cpu")
+    try:
+        device = torch.device(requested)
+    except Exception as exc:
+        logger.warning("Invalid device '%s': %s. Falling back to CPU.", requested, exc)
+        return torch.device("cpu")
+    if device.type != "cuda":
+        return device
+    if not torch.cuda.is_available() or torch.cuda.device_count() == 0:
+        logger.warning("CUDA requested but not available; falling back to CPU")
+        return torch.device("cpu")
+    index = device.index if device.index is not None else 0
+    count = torch.cuda.device_count()
+    if index is None or index < 0 or index >= count:
+        logger.warning(
+            "CUDA device %s requested but only %d visible; using cuda:0",
+            index,
+            count
+        )
+        return torch.device("cuda:0")
+    return torch.device(f"cuda:{index}")
+# -------------------------
+# Peptide to SMILES
+# -------------------------
+def peptide_to_smiles(seq: str) -> str:
+    from rdkit import Chem
+    seq = seq.strip().upper()
+    mol = Chem.MolFromSequence(seq)
+    if mol is None:
+        raise ValueError(f"RDKit failed to convert peptide '{seq}' to SMILES")
+    return Chem.MolToSmiles(mol)
+# -------------------------
+# Self-Attention Block
+# -------------------------
+class SelfAttentionBlock(nn.Module):
+    def __init__(self, d_model, n_heads, dropout=0.1):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout, batch_first=True)
+        self.norm = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, x, key_padding_mask=None):
+        attn_out, _ = self.self_attn(x, x, x, key_padding_mask=key_padding_mask)
+        x = self.norm(x + self.dropout(attn_out))
+        return x
+# -------------------------
+# Cross-Attention Module
+# -------------------------
+class BiMultiHeadCrossAttention(nn.Module):
+    def __init__(self, d_model, n_heads, dropout=0.1):
+        super().__init__()
+        self.prot_to_lig = nn.MultiheadAttention(d_model, n_heads, dropout, batch_first=True)
+        self.lig_to_prot = nn.MultiheadAttention(d_model, n_heads, dropout, batch_first=True)
+        self.prot_ln = nn.LayerNorm(d_model)
+        self.lig_ln = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, prot_h, lig_h, prot_kpm=None, lig_kpm=None):
+        prot_ctx, _ = self.prot_to_lig(prot_h, lig_h, lig_h, key_padding_mask=lig_kpm)
+        prot_h_out = self.prot_ln(prot_h + self.dropout(prot_ctx))
+        lig_ctx, _ = self.lig_to_prot(lig_h, prot_h, prot_h, key_padding_mask=prot_kpm)
+        lig_h_out = self.lig_ln(lig_h + self.dropout(lig_ctx))
+        return prot_h_out, lig_h_out
+# -------------------------
+# TR2-D2 Encoder Wrapper
+# -------------------------
+class TR2D2RoFormerEncoder(nn.Module):
+    def __init__(self, config, tokenizer, checkpoint_path=None, device="cpu"):
+        super().__init__()
+        self.device = device
+        self.encoder = Roformer(config, tokenizer, device=device)
+        if checkpoint_path:
+            print(f"  Loading TR2-D2 checkpoint...")
+            ckpt = torch.load(checkpoint_path, map_location=device, weights_only=False)
+            state_dict = ckpt.get("state_dict", ckpt)
+            roformer_state = {
+                k.replace("model.", "").replace("backbone.", ""): v
+                for k, v in state_dict.items()
+                if "roformer" in k or "encoder" in k or "backbone" in k
+            }
+            self.encoder.model.load_state_dict(roformer_state, strict=False)
+            print("  TR2-D2 checkpoint loaded")
+        for p in self.encoder.parameters():
+            p.requires_grad = False
+        self.encoder.eval()
+    def forward(self, input_ids, attention_mask, inputs_embeds=None):
+        if attention_mask is None:
+            raise ValueError("attention_mask is required for ligand encoding.")
+        attention_mask = attention_mask.to(self.device)
+        if inputs_embeds is not None:
+            inputs_embeds = inputs_embeds.to(self.device)
+            out = self.encoder.model.roformer(
+                inputs_embeds=inputs_embeds,
+                attention_mask=attention_mask
+            )
+        else:
+            input_ids = input_ids.to(self.device)
+            with torch.no_grad():
+                out = self.encoder.model.roformer(
+                    input_ids=input_ids,
+                    attention_mask=attention_mask
+                )
+        return out.last_hidden_state
+# -------------------------
+# Full GPCR Model
+# -------------------------
+class ESM_TR2D2_GPCRClassifier(nn.Module):
+    """
+    GPCR Agonist Classifier with TR2-D2
+    Architecture:
+    1. ESM2 (protein) + TR2-D2 RoFormer (ligand)
+    2. Projections to common dimension
+    3. Self-Attention (1 layer each)
+    4. BiDirectional Cross-Attention (2 stacked layers)
+    5. Masked Average Pooling
+    6. MLP Classifier
+    """
+    def __init__(
+        self,
+        esm_name,
+        tr2d2_config,
+        lig_tokenizer,
+        tr2d2_checkpoint=None,
+        d_model=256,
+        n_heads=4,
+        n_self_attn_layers=1,
+        n_bmca_layers=2,
+        dropout=0.3,
+        device="cuda",
+        esm_cache_dir=None,
+        esm_local_files_only=False
+    ):
+        super().__init__()
+        self.device = device
+        # Frozen encoders
+        print("Loading ESM2 protein encoder...")
+        self.esm = EsmModel.from_pretrained(
+            esm_name,
+            cache_dir=esm_cache_dir,
+            local_files_only=esm_local_files_only
+        )
+        for p in self.esm.parameters():
+            p.requires_grad = False
+        self.esm.eval()
+        print("Loading TR2-D2 ligand encoder...")
+        self.ligand_encoder = TR2D2RoFormerEncoder(
+            tr2d2_config, lig_tokenizer, tr2d2_checkpoint, device
+        )
+        esm_dim = self.esm.config.hidden_size
+        lig_dim = tr2d2_config.roformer.hidden_size
+        self.prot_proj = nn.Linear(esm_dim, d_model)
+        self.lig_proj = nn.Linear(lig_dim, d_model)
+        # Self-attention
+        self.prot_self_attn_layers = nn.ModuleList([
+            SelfAttentionBlock(d_model, n_heads, dropout)
+            for _ in range(n_self_attn_layers)
+        ])
+        self.lig_self_attn_layers = nn.ModuleList([
+            SelfAttentionBlock(d_model, n_heads, dropout)
+            for _ in range(n_self_attn_layers)
+        ])
+        # Cross-attention
+        self.bmca_layers = nn.ModuleList([
+            BiMultiHeadCrossAttention(d_model, n_heads, dropout)
+            for _ in range(n_bmca_layers)
+        ])
+        # Classifier
+        self.classifier = nn.Sequential(
+            nn.Linear(2 * d_model, d_model),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(d_model, 2)
+        )
+    def forward(self, prot_tokens, lig_tokens, lig_inputs_embeds=None):
+        prot_kpm = prot_tokens["attention_mask"].eq(0)
+        lig_kpm = lig_tokens["attention_mask"].eq(0)
+        with torch.no_grad():
+            prot_out = self.esm(**prot_tokens).last_hidden_state
+        lig_out = self.ligand_encoder(
+            lig_tokens["input_ids"],
+            lig_tokens["attention_mask"],
+            inputs_embeds=lig_inputs_embeds
+        )
+        prot_h = self.prot_proj(prot_out)
+        lig_h = self.lig_proj(lig_out)
+        # Self-attention
+        for self_attn in self.prot_self_attn_layers:
+            prot_h = self_attn(prot_h, key_padding_mask=prot_kpm)
+        for self_attn in self.lig_self_attn_layers:
+            lig_h = self_attn(lig_h, key_padding_mask=lig_kpm)
+        # Cross-attention (2 stacked)
+        for bmca in self.bmca_layers:
+            prot_h, lig_h = bmca(prot_h, lig_h, prot_kpm=prot_kpm, lig_kpm=lig_kpm)
+        # Masked average pooling
+        prot_mask = prot_tokens["attention_mask"].unsqueeze(-1)
+        lig_mask = lig_tokens["attention_mask"].unsqueeze(-1)
+        prot_repr = (prot_h * prot_mask).sum(dim=1) / prot_mask.sum(dim=1).clamp(min=1)
+        lig_repr = (lig_h * lig_mask).sum(dim=1) / lig_mask.sum(dim=1).clamp(min=1)
+        return self.classifier(torch.cat([prot_repr, lig_repr], dim=-1))
+# -------------------------
+# Tokenization
+# -------------------------
+def create_tr2d2_config(vocab_size):
+    return SimpleNamespace(
+        roformer=SimpleNamespace(
+            vocab_size=vocab_size,
+            hidden_size=768,
+            n_layers=8,
+            n_heads=8,
+            max_position_embeddings=1035
+        )
+    )
+def _load_state_dict_flexible(model: nn.Module, state_dict: Dict, strict: bool = True) -> None:
+    try:
+        model.load_state_dict(state_dict, strict=strict)
+        return
+    except RuntimeError as exc:
+        model_keys = set(model.state_dict().keys())
+        filtered = {k: v for k, v in state_dict.items() if k in model_keys}
+        logger.warning("Strict load failed: %s", exc)
+        logger.warning(
+            "Retrying with filtered keys (%d/%d) and strict=False",
+            len(filtered),
+            len(state_dict)
+        )
+        incompatible = model.load_state_dict(filtered, strict=False)
+        if incompatible.missing_keys:
+            logger.warning("Missing keys (first 10): %s", incompatible.missing_keys[:10])
+        if incompatible.unexpected_keys:
+            logger.warning("Unexpected keys (first 10): %s", incompatible.unexpected_keys[:10])
+def tokenize_protein(seq, tokenizer, device):
+    out = tokenizer(
+        seq,
+        return_tensors="pt",
+        padding=True,
+        truncation=True,
+        max_length=1024,
+        add_special_tokens=True
+    )
+    return {k: v.to(device) for k, v in out.items()}
+def tokenize_ligand(smiles, tokenizer, max_len, device):
+    enc = tokenizer(
+        smiles,
+        return_tensors="pt",
+        truncation=True,
+        max_length=max_len,
+        add_special_tokens=True
+    )
+    ids = enc["input_ids"].squeeze(0)
+    att = enc["attention_mask"].squeeze(0)
+    pad = max_len - ids.numel()
+    if pad > 0:
+        ids = torch.cat([ids, torch.full((pad,), tokenizer.pad_token_id)])
+        att = torch.cat([att, torch.zeros(pad)])
+    return {
+        "input_ids": ids.unsqueeze(0).to(device),
+        "attention_mask": att.unsqueeze(0).to(device)
+    }
+# -------------------------
+# Training-Compatible Oracle Wrapper
+# -------------------------
+class DirectionalOracle(nn.Module):
+    """
+    Batch-capable oracle wrapper with TD3B-compatible predict_with_confidence().
+    This class is intended for training integration where peptide/protein tokens
+    are provided directly (batched) and the oracle runs in inference-only mode.
+    """
+    def __init__(
+        self,
+        model_ckpt: str,
+        tr2d2_checkpoint: str,
+        tokenizer_vocab: str,
+        tokenizer_splits: str,
+        esm_name: str = "facebook/esm2_t33_650M_UR50D",
+        d_model: int = 256,
+        n_heads: int = 4,
+        n_self_attn_layers: int = 1,
+        n_bmca_layers: int = 2,
+        dropout: float = 0.3,
+        max_ligand_length: int = 768,
+        max_protein_length: int = 1024,
+        device: Optional[str] = None,
+        esm_cache_dir: Optional[str] = None,
+        esm_local_files_only: bool = False
+    ):
+        super().__init__()
+        if isinstance(device, torch.device):
+            device = str(device)
+        self.device = resolve_device(device)
+        self.max_ligand_length = max_ligand_length
+        self.max_protein_length = max_protein_length
+        self._warned_ligand_truncation = False
+        self._warned_protein_truncation = False
+        self.lig_tokenizer = SMILES_SPE_Tokenizer(tokenizer_vocab, tokenizer_splits)
+        self.prot_tokenizer = EsmTokenizer.from_pretrained(
+            esm_name,
+            cache_dir=esm_cache_dir,
+            local_files_only=esm_local_files_only
+        )
+        tr2d2_cfg = create_tr2d2_config(self.lig_tokenizer.vocab_size)
+        self.model = ESM_TR2D2_GPCRClassifier(
+            esm_name=esm_name,
+            tr2d2_config=tr2d2_cfg,
+            lig_tokenizer=self.lig_tokenizer,
+            tr2d2_checkpoint=tr2d2_checkpoint,
+            d_model=d_model,
+            n_heads=n_heads,
+            n_self_attn_layers=n_self_attn_layers,
+            n_bmca_layers=n_bmca_layers,
+            dropout=dropout,
+            device=self.device,
+            esm_cache_dir=esm_cache_dir,
+            esm_local_files_only=esm_local_files_only
+        )
+        state_dict = torch.load(model_ckpt, map_location=self.device, weights_only=False)
+        if isinstance(state_dict, dict) and "model_state_dict" in state_dict:
+            state_dict = state_dict["model_state_dict"]
+        _load_state_dict_flexible(self.model, state_dict, strict=True)
+        self.model.to(self.device).eval()
+        for param in self.model.parameters():
+            param.requires_grad = False
+        self._lig_pad_token_id = self.lig_tokenizer.pad_token_id
+        if self._lig_pad_token_id is None:
+            self._lig_pad_token_id = 0
+        self._prot_pad_token_id = self.prot_tokenizer.pad_token_id
+        if self._prot_pad_token_id is None:
+            self._prot_pad_token_id = 0
+    def encode_protein(self, protein_seq: str) -> torch.Tensor:
+        tokens = self.prot_tokenizer(
+            protein_seq,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=self.max_protein_length,
+            add_special_tokens=True
+        )
+        return tokens["input_ids"].to(self.device)
+    def _normalize_token_dict(
+        self,
+        tokens: torch.Tensor,
+        pad_token_id: int,
+        max_length: int,
+        warned_attr: str
+    ) -> Dict[str, torch.Tensor]:
+        if isinstance(tokens, dict):
+            input_ids = tokens.get("input_ids")
+            if input_ids is None:
+                raise ValueError("Token dict must include input_ids.")
+            attention_mask = tokens.get("attention_mask")
+            input_ids = input_ids.to(self.device)
+            if attention_mask is None:
+                attention_mask = (input_ids != pad_token_id).long()
+            else:
+                attention_mask = attention_mask.to(self.device)
+        else:
+            input_ids = tokens
+            if input_ids.dim() == 1:
+                input_ids = input_ids.unsqueeze(0)
+            input_ids = input_ids.to(self.device)
+            attention_mask = (input_ids != pad_token_id).long()
+        if max_length is not None and input_ids.size(1) > max_length:
+            if not getattr(self, warned_attr):
+                logger.warning(
+                    "Truncating input from length %d to max_length=%d",
+                    input_ids.size(1),
+                    max_length
+                )
+                setattr(self, warned_attr, True)
+            input_ids = input_ids[:, :max_length]
+            attention_mask = attention_mask[:, :max_length]
+        return {"input_ids": input_ids, "attention_mask": attention_mask}
+    def _normalize_prob_inputs(
+        self,
+        probs: torch.Tensor,
+        attention_mask: Optional[torch.Tensor],
+        max_length: int,
+        warned_attr: str,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        if probs.dim() == 2:
+            probs = probs.unsqueeze(0)
+        probs = probs.to(self.device)
+        if attention_mask is None:
+            attention_mask = torch.ones(
+                probs.size(0), probs.size(1), device=self.device, dtype=torch.long
+            )
+        else:
+            if attention_mask.dim() == 1:
+                attention_mask = attention_mask.unsqueeze(0)
+            attention_mask = attention_mask.to(self.device).long()
+        if max_length is not None and probs.size(1) > max_length:
+            if not getattr(self, warned_attr):
+                logger.warning(
+                    "Truncating input from length %d to max_length=%d",
+                    probs.size(1),
+                    max_length
+                )
+                setattr(self, warned_attr, True)
+            probs = probs[:, :max_length]
+            attention_mask = attention_mask[:, :max_length]
+        return probs, attention_mask
+    @torch.no_grad()
+    def predict_with_confidence(
+        self,
+        peptide_tokens: torch.Tensor,
+        protein_tokens: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        lig_tokens = self._normalize_token_dict(
+            peptide_tokens,
+            self._lig_pad_token_id,
+            self.max_ligand_length,
+            "_warned_ligand_truncation"
+        )
+        prot_tokens = self._normalize_token_dict(
+            protein_tokens,
+            self._prot_pad_token_id,
+            self.max_protein_length,
+            "_warned_protein_truncation"
+        )
+        lig_batch = lig_tokens["input_ids"].size(0)
+        prot_batch = prot_tokens["input_ids"].size(0)
+        if prot_batch == 1 and lig_batch > 1:
+            prot_tokens = {k: v.expand(lig_batch, -1) for k, v in prot_tokens.items()}
+        elif prot_batch != lig_batch:
+            raise ValueError(
+                f"Batch size mismatch: peptide_tokens={lig_batch}, protein_tokens={prot_batch}"
+            )
+        logits = self.model(prot_tokens, lig_tokens)
+        probs = F.softmax(logits, dim=-1)
+        p_agonist = probs[:, 1]
+        confidence = torch.max(probs, dim=-1).values
+        return p_agonist, confidence
+    def predict_from_probs(
+        self,
+        ligand_probs: torch.Tensor,
+        protein_tokens: torch.Tensor,
+        ligand_attention_mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        lig_probs, lig_attention = self._normalize_prob_inputs(
+            ligand_probs,
+            ligand_attention_mask,
+            self.max_ligand_length,
+            "_warned_ligand_truncation",
+        )
+        prot_tokens = self._normalize_token_dict(
+            protein_tokens,
+            self._prot_pad_token_id,
+            self.max_protein_length,
+            "_warned_protein_truncation"
+        )
+        lig_batch = lig_probs.size(0)
+        prot_batch = prot_tokens["input_ids"].size(0)
+        if prot_batch == 1 and lig_batch > 1:
+            prot_tokens = {k: v.expand(lig_batch, -1) for k, v in prot_tokens.items()}
+        elif prot_batch != lig_batch:
+            raise ValueError(
+                f"Batch size mismatch: ligand_probs={lig_batch}, protein_tokens={prot_batch}"
+            )
+        emb_weight = self.model.ligand_encoder.encoder.model.roformer.embeddings.word_embeddings.weight
+        if lig_probs.size(-1) != emb_weight.size(0):
+            raise ValueError(
+                f"Ligand vocab mismatch: probs={lig_probs.size(-1)} vs oracle={emb_weight.size(0)}"
+            )
+        lig_inputs_embeds = lig_probs @ emb_weight
+        lig_input_ids = torch.zeros(
+            lig_probs.size(0), lig_probs.size(1), device=lig_probs.device, dtype=torch.long
+        )
+        lig_tokens = {"input_ids": lig_input_ids, "attention_mask": lig_attention}
+        logits = self.model(prot_tokens, lig_tokens, lig_inputs_embeds=lig_inputs_embeds)
+        probs = F.softmax(logits, dim=-1)
+        return probs[:, 1]
+# -------------------------
+# Prediction
+# -------------------------
+@torch.no_grad()
+def predict(model, prot_tok, lig_tok, protein_seq, peptide_seq, device, threshold=0.5):
+    """
+    Predict agonist activity
+    Returns:
+        dict with keys: smiles, non_agonist_prob, agonist_prob, prediction, confidence
+    """
+    # Convert peptide to SMILES
+    smiles = peptide_to_smiles(peptide_seq)
+    # Tokenize
+    prot_tokens = tokenize_protein(protein_seq, prot_tok, device)
+    lig_tokens = tokenize_ligand(smiles, lig_tok, 768, device)  # FIXED: 768 not 256!
+    # Predict
+    logits = model(prot_tokens, lig_tokens)
+    probs = F.softmax(logits, dim=-1).squeeze(0)
+    p_non_agonist = probs[0].item()
+    p_agonist = probs[1].item()
+    prediction = "agonist" if p_agonist >= threshold else "non-agonist"
+    return {
+        "smiles": smiles,
+        "non_agonist_prob": p_non_agonist,
+        "agonist_prob": p_agonist,
+        "prediction": prediction,
+        "confidence": max(p_non_agonist, p_agonist)
+    }
+# -------------------------
+# MAIN
+# -------------------------
+def main():
+    parser = argparse.ArgumentParser(
+        description="GPCR Agonist Classifier - TR2-D2 Inference"
+    )
+    parser.add_argument("--model_ckpt", required=True,
+                       help="Path to trained model checkpoint")
+    parser.add_argument("--tr2d2_checkpoint", required=True,
+                       help="Path to TR2-D2 pretrained checkpoint")
+    parser.add_argument("--tokenizer_vocab", required=True,
+                       help="Path to tokenizer vocabulary")
+    parser.add_argument("--tokenizer_splits", required=True,
+                       help="Path to tokenizer splits")
+    parser.add_argument("--protein_seq", required=True,
+                       help="GPCR protein sequence")
+    parser.add_argument("--ligand_peptide", required=True,
+                       help="Ligand peptide sequence")
+    parser.add_argument("--threshold", type=float, default=0.5,
+                       help="Classification threshold (default: 0.5)")
+    parser.add_argument("--d_model", type=int, default=256,
+                       help="Hidden dimension (must match training)")
+    parser.add_argument("--n_heads", type=int, default=4,
+                       help="Number of attention heads (must match training)")
+    parser.add_argument("--n_self_attn_layers", type=int, default=1,
+                       help="Number of self-attention layers (must match training)")
+    parser.add_argument("--n_bmca_layers", type=int, default=2,
+                       help="Number of cross-attention layers (must match training)")
+    parser.add_argument("--dropout", type=float, default=0.3,
+                       help="Dropout rate (must match training)")
+    parser.add_argument("--device", default=None,
+                       help="Device (cuda/cpu, default: auto)")
+    parser.add_argument("--esm_name", default="facebook/esm2_t33_650M_UR50D",
+                       help="ESM model name or local path")
+    parser.add_argument("--esm_cache_dir", default=None,
+                       help="Optional cache directory for ESM model")
+    parser.add_argument("--esm_local_files_only", action="store_true",
+                       help="Load ESM from local cache only (no network)")
+    args = parser.parse_args()
+    # Device
+    device = resolve_device(args.device)
+    print(f"Device: {device}")
+    print("")
+    # Load tokenizers
+    print("Loading tokenizers...")
+    prot_tok = EsmTokenizer.from_pretrained(
+        args.esm_name,
+        cache_dir=args.esm_cache_dir,
+        local_files_only=args.esm_local_files_only
+    )
+    lig_tok = SMILES_SPE_Tokenizer(args.tokenizer_vocab, args.tokenizer_splits)
+    print(f"  Vocab size: {lig_tok.vocab_size}")
+    print("")
+    # Create config
+    tr2d2_cfg = create_tr2d2_config(lig_tok.vocab_size)
+    # Load model
+    print("Loading model...")
+    model = ESM_TR2D2_GPCRClassifier(
+        esm_name=args.esm_name,
+        tr2d2_config=tr2d2_cfg,
+        lig_tokenizer=lig_tok,
+        tr2d2_checkpoint=args.tr2d2_checkpoint,
+        d_model=args.d_model,
+        n_heads=args.n_heads,
+        n_self_attn_layers=args.n_self_attn_layers,
+        n_bmca_layers=args.n_bmca_layers,
+        dropout=args.dropout,
+        device=device,
+        esm_cache_dir=args.esm_cache_dir,
+        esm_local_files_only=args.esm_local_files_only
+    )
+    # Load trained weights
+    print("  Loading trained weights...")
+    state_dict = torch.load(args.model_ckpt, map_location=device)
+    _load_state_dict_flexible(model, state_dict, strict=True)
+    model.to(device).eval()
+    print("  Model ready.")
+    print("")
+    # Predict
+    print("Running inference...")
+    result = predict(
+        model, prot_tok, lig_tok,
+        args.protein_seq, args.ligand_peptide,
+        device, args.threshold
+    )
+    # Display results
+    print("")
+    print("=" * 70)
+    print("RESULTS")
+    print("=" * 70)
+    print(f"Protein:  {args.protein_seq[:50]}{'...' if len(args.protein_seq) > 50 else ''}")
+    print(f"Ligand:   {args.ligand_peptide}")
+    print(f"SMILES:   {result['smiles']}")
+    print("")
+    print(f"Non-agonist probability: {result['non_agonist_prob']:.4f}")
+    print(f"Agonist probability:     {result['agonist_prob']:.4f}")
+    print("")
+    print(f"Prediction (threshold={args.threshold}): {result['prediction'].upper()}")
+    print(f"Confidence: {result['confidence']:.4f}")
+    print("=" * 70)
+if __name__ == "__main__":
+    main()

td3b/td3b_finetune.py ADDED Viewed

	@@ -0,0 +1,604 @@

+"""
+TD3B Finetuning Loop
+Extends TR2-D2 training with contrastive loss and directional rewards.
+"""
+import numpy as np
+import torch
+import wandb
+import os
+from finetune_utils import loss_wdce
+from .td3b_losses import TD3BTotalLoss, extract_embeddings_from_mdlm
+from tqdm import tqdm
+import pandas as pd
+from plotting import plot_data_with_distribution_seaborn, plot_data
+def td3b_finetune(
+    args,
+    cfg,
+    policy_model,
+    reward_model,
+    mcts=None,
+    pretrained=None,
+    filename=None,
+    prot_name=None,
+    eps=1e-5,
+    # TD3B-specific arguments
+    contrastive_weight=0.1,
+    contrastive_margin=1.0,
+    contrastive_type='margin',
+    embedding_pool_method='mean',
+    kl_beta=0.1
+):
+    """
+    TD3B finetuning with combined WDCE + contrastive loss + KL regularization.
+    Args:
+        args: Configuration arguments
+        cfg: Hydra config
+        policy_model: Policy model (MDLM)
+        reward_model: Reward scoring functions (TD3BRewardFunction)
+        mcts: TD3B_MCTS instance
+        pretrained: Pretrained model (for no-MCTS mode)
+        filename: Output filename
+        prot_name: Target protein name
+        eps: Small epsilon
+        contrastive_weight: λ for contrastive loss
+        contrastive_margin: Margin for margin-based contrastive loss
+        contrastive_type: 'margin' or 'infonce'
+        embedding_pool_method: 'mean', 'max', or 'cls'
+        kl_beta: β coefficient for KL divergence regularization
+    Returns:
+        batch_losses: List of training losses
+    """
+    base_path = args.base_path
+    dt = (1 - eps) / args.total_num_steps
+    if args.no_mcts:
+        assert pretrained is not None, "pretrained model is required for no mcts"
+    else:
+        assert mcts is not None, "mcts is required for mcts"
+    # Create reference model (frozen copy of policy model at start of training)
+    # Cannot use copy.deepcopy() due to unpicklable objects (file handles, etc.)
+    # Instead, create a new model instance and load CLONED state dict
+    print("[TD3B] Creating reference model for KL regularization...")
+    # Import Diffusion class
+    from diffusion import Diffusion
+    # Create new instance with same config
+    reference_model = Diffusion(
+        config=policy_model.config,
+        tokenizer=policy_model.tokenizer,
+        mode="eval",
+        device=policy_model.device if hasattr(policy_model, 'device') else args.device
+    )
+    # Get the device from policy model
+    device = policy_model.device if hasattr(policy_model, 'device') else args.device
+    if device is None:
+        device = next(policy_model.parameters()).device
+    # IMPORTANT: Clone the state dict to create independent tensors
+    # This ensures no memory sharing between policy and reference model
+    state_dict_copy = {
+        key: value.clone().detach()
+        for key, value in policy_model.state_dict().items()
+    }
+    reference_model.load_state_dict(state_dict_copy)
+    # Move reference model to same device as policy model
+    reference_model = reference_model.to(device)
+    # Freeze and set to eval mode
+    reference_model.eval()
+    for param in reference_model.parameters():
+        param.requires_grad = False
+    print(f"[TD3B] Reference model frozen with {sum(p.numel() for p in reference_model.parameters())} parameters")
+    print(f"[TD3B] Reference model on device: {device}")
+    # Verify no parameter sharing
+    policy_params = {id(p) for p in policy_model.parameters()}
+    ref_params = {id(p) for p in reference_model.parameters()}
+    assert len(policy_params.intersection(ref_params)) == 0, \
+        "ERROR: Reference model shares parameters with policy model!"
+    print("[TD3B] ✓ Verified: No parameter sharing between policy and reference model")
+    # Initialize TD3B total loss
+    td3b_loss_fn = TD3BTotalLoss(
+        contrastive_weight=contrastive_weight,
+        contrastive_margin=contrastive_margin,
+        contrastive_type=contrastive_type,
+        kl_beta=kl_beta,
+        reference_model=reference_model
+    )
+    # Set model to train mode
+    policy_model.train()
+    torch.set_grad_enabled(True)
+    optim = torch.optim.AdamW(policy_model.parameters(), lr=args.learning_rate)
+    # Record metrics
+    batch_losses = []
+    batch_wdce_losses = []
+    batch_contrastive_losses = []
+    batch_kl_losses = []
+    # Initialize saved trajectories
+    x_saved, log_rnd_saved, final_rewards_saved = None, None, None
+    directional_labels_saved, confidences_saved = None, None
+    # Logs
+    valid_fraction_log = []
+    affinity_log = []
+    gated_reward_log = []
+    confidence_log = []
+    direction_prediction_log = []  # Oracle predictions f_φ ∈ [0, 1]
+    consistency_reward_log = []  # d* × (f_φ - 0.5)
+    ### Fine-Tuning Loop ###
+    pbar = tqdm(range(args.num_epochs))
+    for epoch in pbar:
+        rewards = []
+        losses = []
+        policy_model.train()
+        with torch.no_grad():
+            if x_saved is None or epoch % args.resample_every_n_step == 0:
+                # Generate trajectories
+                if args.no_mcts:
+                    # Direct sampling (not typical for TD3B, but keep for compatibility)
+                    x_final, log_rnd, final_rewards = policy_model.sample_finetuned_with_rnd(
+                        args, reward_model, pretrained
+                    )
+                    directional_labels = torch.zeros(x_final.size(0), dtype=torch.float32)
+                    confidences = torch.ones(x_final.size(0), dtype=torch.float32)
+                else:
+                    # TD3B MCTS forward pass
+                    # For dual-direction mode, sample BOTH directions in the same batch
+                    if hasattr(args, 'target_direction') and args.target_direction == 'both':
+                        print(f"[Dual-direction] Epoch {epoch}: Sampling BOTH agonist and antagonist binders")
+                        # Sample agonist binders (d* = +1)
+                        reward_model.target_direction = 1.0
+                        if epoch % args.reset_every_n_step == 0:
+                            results_agonist = mcts.forward(resetTree=True)
+                        else:
+                            results_agonist = mcts.forward(resetTree=False)
+                        # Sample antagonist binders (d* = -1)
+                        reward_model.target_direction = -1.0
+                        # Don't reset tree for antagonist to save computation
+                        results_antagonist = mcts.forward(resetTree=False)
+                        # Unpack both results
+                        if len(results_agonist) == 7 and len(results_antagonist) == 7:
+                            x_agonist, log_rnd_agonist, rewards_agonist, _, _, labels_agonist, conf_agonist = results_agonist
+                            x_antagonist, log_rnd_antagonist, rewards_antagonist, _, _, labels_antagonist, conf_antagonist = results_antagonist
+                            # Force labels to be correct (in case oracle is wrong)
+                            labels_agonist = torch.ones(x_agonist.size(0), dtype=torch.float32) * 1.0  # +1 for agonist
+                            labels_antagonist = torch.ones(x_antagonist.size(0), dtype=torch.float32) * -1.0  # -1 for antagonist
+                            # Combine both directions into single batch
+                            x_final = torch.cat([x_agonist, x_antagonist], dim=0)
+                            log_rnd = torch.cat([log_rnd_agonist, log_rnd_antagonist], dim=0)
+                            final_rewards = np.concatenate([rewards_agonist, rewards_antagonist], axis=0)
+                            directional_labels = torch.cat([labels_agonist, labels_antagonist], dim=0)
+                            confidences = torch.cat([
+                                conf_agonist if isinstance(conf_agonist, torch.Tensor) else torch.tensor(conf_agonist),
+                                conf_antagonist if isinstance(conf_antagonist, torch.Tensor) else torch.tensor(conf_antagonist)
+                            ], dim=0)
+                            print(f"  → Combined batch: {x_agonist.size(0)} agonists + {x_antagonist.size(0)} antagonists = {x_final.size(0)} total")
+                            print(f"  → Directional labels: {torch.unique(directional_labels).tolist()} (DIVERSITY CONFIRMED!)")
+                        else:
+                            raise ValueError("Dual-direction mode requires 7-value return from MCTS")
+                    else:
+                        # Single-direction mode
+                        if epoch % args.reset_every_n_step == 0:
+                            results = mcts.forward(resetTree=True)
+                        else:
+                            results = mcts.forward(resetTree=False)
+                        # Unpack results (TD3B version includes directional labels and confidences)
+                        if len(results) == 7:
+                            x_final, log_rnd, final_rewards, score_vectors, sequences, directional_labels, confidences = results
+                            # Convert numpy arrays to tensors immediately for consistency
+                            if not isinstance(directional_labels, torch.Tensor):
+                                directional_labels = torch.tensor(directional_labels, dtype=torch.float32)
+                            if not isinstance(confidences, torch.Tensor):
+                                confidences = torch.tensor(confidences, dtype=torch.float32)
+                        else:
+                            # Fallback for compatibility with base MCTS
+                            x_final, log_rnd, final_rewards, score_vectors, sequences = results
+                            directional_labels = torch.zeros(x_final.size(0), dtype=torch.float32)
+                            confidences = torch.ones(x_final.size(0), dtype=torch.float32)
+                # Save for next iteration
+                x_saved = x_final
+                log_rnd_saved = log_rnd
+                final_rewards_saved = final_rewards
+                directional_labels_saved = directional_labels
+                confidences_saved = confidences
+            else:
+                # Reuse cached trajectories
+                x_final = x_saved
+                log_rnd = log_rnd_saved
+                final_rewards = final_rewards_saved
+                directional_labels = directional_labels_saved
+                confidences = confidences_saved
+        # Compute WDCE loss
+        wdce_loss = loss_wdce(
+            policy_model,
+            log_rnd,
+            x_final,
+            num_replicates=args.wdce_num_replicates,
+            centering=args.centering
+        )
+        # Compute KL divergence loss
+        # Use a random masking and forward pass for KL computation
+        mask_index = policy_model.mask_index
+        device = x_final.device
+        # Sample random noise level
+        lamda = torch.rand(x_final.shape[0], device=device)  # (B,)
+        sigma_kl = -torch.log1p(-(1 - eps) * lamda)
+        # Apply random masking
+        masked_index = torch.rand(*x_final.shape, device=device) < lamda[..., None]  # (B, L)
+        perturbed_batch = torch.where(masked_index, mask_index, x_final)
+        attn_mask_kl = torch.ones_like(perturbed_batch).to(device)
+        # Compute KL loss
+        kl_loss = td3b_loss_fn.compute_kl_loss(
+            policy_model,
+            perturbed_batch,
+            attn_mask_kl,
+            sigma_kl
+        )
+        # Extract embeddings for contrastive loss
+        # Only compute if we have directional labels
+        if directional_labels is not None and len(torch.unique(directional_labels)) > 1:
+            # Get device from backbone
+            device = policy_model.backbone.device if hasattr(policy_model.backbone, 'device') else x_final.device
+            embeddings = extract_embeddings_from_mdlm(
+                policy_model,
+                x_final.to(device),
+                pool_method=embedding_pool_method
+            )
+            # Move directional labels to same device
+            directional_labels = directional_labels.to(embeddings.device)
+            # Enable debug mode for first 3 epochs or if loss was zero last epoch
+            debug_mode = (epoch < 3) or (epoch > 0 and batch_contrastive_losses and batch_contrastive_losses[-1] < 1e-6)
+            # Compute total TD3B loss
+            total_loss, loss_dict = td3b_loss_fn.compute_loss(
+                wdce_loss,
+                embeddings,
+                directional_labels,
+                kl_loss=kl_loss,  # Pass KL loss
+                debug=debug_mode  # Enable debugging when needed
+            )
+        else:
+            # If no directional diversity, skip contrastive loss
+            print(f"[WARNING] Epoch {epoch}: No directional diversity! Skipping contrastive loss.")
+            print(f"  Labels: {directional_labels.cpu().tolist() if directional_labels is not None else 'None'}")
+            total_loss = wdce_loss + td3b_loss_fn.kl_beta * kl_loss
+            loss_dict = {
+                'total_loss': total_loss.item(),
+                'wdce_loss': wdce_loss.item(),
+                'contrastive_loss': 0.0,
+                'kl_loss': kl_loss.item()
+            }
+        # Gradient descent
+        total_loss.backward()
+        # Gradient clipping
+        if args.grad_clip:
+            torch.nn.utils.clip_grad_norm_(policy_model.parameters(), args.gradnorm_clip)
+        optim.step()
+        optim.zero_grad()
+        pbar.set_postfix(
+            total_loss=loss_dict['total_loss'],
+            wdce=loss_dict['wdce_loss'],
+            ctr=loss_dict['contrastive_loss']
+        )
+        # Evaluation sampling
+        x_eval, eval_metrics = policy_model.sample_finetuned_td3b(
+            args,
+            reward_model,
+            batch_size=50,
+            dataframe=False
+        )
+        # Extract metrics (TD3B-specific)
+        affinity = eval_metrics.get('affinity', [0])
+        gated_reward = eval_metrics.get('gated_reward', [0])
+        confidence = eval_metrics.get('confidence', [1])
+        valid_fraction = eval_metrics.get('valid_fraction', 0)
+        # Extract direction predictions (f_φ ∈ [0, 1])
+        direction_predictions = eval_metrics.get('direction_predictions', [0.5])
+        # Compute consistency reward: d* × (f_φ - 0.5)
+        # Get target direction d* from reward_model
+        d_star = reward_model.target_direction  # +1 or -1
+        consistency_rewards = [d_star * (f_phi - 0.5) for f_phi in direction_predictions]
+        # Append to logs
+        affinity_log.append(affinity)
+        gated_reward_log.append(gated_reward)
+        confidence_log.append(confidence)
+        valid_fraction_log.append(valid_fraction)
+        direction_prediction_log.append(direction_predictions)
+        consistency_reward_log.append(consistency_rewards)
+        batch_losses.append(loss_dict['total_loss'])
+        batch_wdce_losses.append(loss_dict['wdce_loss'])
+        batch_contrastive_losses.append(loss_dict['contrastive_loss'])
+        batch_kl_losses.append(loss_dict.get('kl_loss', 0.0))
+        # Compute search statistics
+        if args.no_mcts:
+            mean_reward_search = final_rewards.mean().item()
+            min_reward_search = final_rewards.min().item()
+            max_reward_search = final_rewards.max().item()
+            median_reward_search = final_rewards.median().item()
+        else:
+            mean_reward_search = np.mean(final_rewards)
+            min_reward_search = np.min(final_rewards)
+            max_reward_search = np.max(final_rewards)
+            median_reward_search = np.median(final_rewards)
+        # Compute direction oracle and consistency reward statistics
+        mean_direction = np.mean(direction_predictions) if len(direction_predictions) > 0 else 0.5
+        std_direction = np.std(direction_predictions) if len(direction_predictions) > 0 else 0.0
+        mean_consistency = np.mean(consistency_rewards) if len(consistency_rewards) > 0 else 0.0
+        std_consistency = np.std(consistency_rewards) if len(consistency_rewards) > 0 else 0.0
+        print(
+            f"epoch {epoch} | "
+            f"affinity {np.mean(affinity):.4f} | "
+            f"gated_reward {np.mean(gated_reward):.4f} | "
+            f"confidence {np.mean(confidence):.4f} | "
+            f"valid_frac {valid_fraction:.4f} | "
+            f"direction_oracle {mean_direction:.4f}±{std_direction:.4f} | "
+            f"consistency_reward {mean_consistency:.4f}±{std_consistency:.4f} | "
+            f"total_loss {loss_dict['total_loss']:.4f} | "
+            f"wdce_loss {loss_dict['wdce_loss']:.4f} | "
+            f"contrastive_loss {loss_dict['contrastive_loss']:.4f} | "
+            f"kl_loss {loss_dict.get('kl_loss', 0.0):.4f}"
+        )
+        # W&B logging
+        wandb.log({
+            "epoch": epoch,
+            "affinity": np.mean(affinity),
+            "gated_reward": np.mean(gated_reward),
+            "confidence": np.mean(confidence),
+            "valid_fraction": valid_fraction,
+            "direction_oracle/mean": mean_direction,
+            "direction_oracle/std": std_direction,
+            "consistency_reward/mean": mean_consistency,
+            "consistency_reward/std": std_consistency,
+            "total_loss": loss_dict['total_loss'],
+            "wdce_loss": loss_dict['wdce_loss'],
+            "contrastive_loss": loss_dict['contrastive_loss'],
+            "kl_loss": loss_dict.get('kl_loss', 0.0),
+            "mean_reward_search": mean_reward_search,
+            "min_reward_search": min_reward_search,
+            "max_reward_search": max_reward_search,
+            "median_reward_search": median_reward_search
+        })
+        # Save checkpoint
+        if (epoch + 1) % args.save_every_n_epochs == 0:
+            model_path = os.path.join(args.save_path, f'model_{epoch}.ckpt')
+            torch.save(policy_model.state_dict(), model_path)
+            print(f"model saved at epoch {epoch}")
+    ### End of Fine-Tuning Loop ###
+    wandb.finish()
+    # Save logs and plots
+    plot_path = f'{base_path}/TR2-D2/tr2d2-pep/results/{args.run_name}'
+    os.makedirs(plot_path, exist_ok=True)
+    output_log_path = f'{base_path}/TR2-D2/tr2d2-pep/results/{args.run_name}/log_{filename}.csv'
+    save_td3b_logs_to_file(
+        valid_fraction_log,
+        affinity_log,
+        gated_reward_log,
+        confidence_log,
+        direction_prediction_log,
+        consistency_reward_log,
+        output_log_path
+    )
+    plot_data(valid_fraction_log,
+              save_path=f'{base_path}/TR2-D2/tr2d2-pep/results/{args.run_name}/valid_{filename}.png')
+    plot_data_with_distribution_seaborn(
+        log1=affinity_log,
+        save_path=f'{base_path}/TR2-D2/tr2d2-pep/results/{args.run_name}/affinity_{filename}.png',
+        label1=f"Average Affinity to {prot_name}",
+        title=f"Average Affinity to {prot_name} Over Iterations"
+    )
+    plot_data_with_distribution_seaborn(
+        log1=gated_reward_log,
+        save_path=f'{base_path}/TR2-D2/tr2d2-pep/results/{args.run_name}/gated_reward_{filename}.png',
+        label1="Average Gated Reward",
+        title="Average Gated Reward Over Iterations"
+    )
+    plot_data_with_distribution_seaborn(
+        log1=confidence_log,
+        save_path=f'{base_path}/TR2-D2/tr2d2-pep/results/{args.run_name}/confidence_{filename}.png',
+        label1="Average Confidence",
+        title="Average Confidence Over Iterations"
+    )
+    # Final evaluation
+    x_eval, eval_metrics, df = policy_model.sample_finetuned_td3b(
+        args,
+        reward_model,
+        batch_size=200,
+        dataframe=True
+    )
+    df.to_csv(f'{base_path}/TR2-D2/tr2d2-pep/results/{args.run_name}/{prot_name}_generation_results.csv', index=False)
+    return batch_losses
+def save_td3b_logs_to_file(valid_fraction_log, affinity_log, gated_reward_log, confidence_log,
+                           direction_prediction_log, consistency_reward_log, output_path):
+    """
+    Saves TD3B-specific logs to a CSV file.
+    Parameters:
+        valid_fraction_log (list): Log of valid fractions over iterations.
+        affinity_log (list): Log of binding affinity over iterations.
+        gated_reward_log (list): Log of gated rewards over iterations.
+        confidence_log (list): Log of confidence scores over iterations.
+        direction_prediction_log (list): Log of direction oracle predictions over iterations.
+        consistency_reward_log (list): Log of consistency rewards over iterations.
+        output_path (str): Path to save the log CSV file.
+    """
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+    # Combine logs into a DataFrame
+    log_data = {
+        "Iteration": list(range(1, len(valid_fraction_log) + 1)),
+        "Valid Fraction": valid_fraction_log,
+        "Binding Affinity": affinity_log,
+        "Gated Reward": gated_reward_log,
+        "Confidence": confidence_log,
+        "Direction Oracle": direction_prediction_log,
+        "Consistency Reward": consistency_reward_log
+    }
+    df = pd.DataFrame(log_data)
+    # Save to CSV
+    df.to_csv(output_path, index=False)
+    print(f"Logs saved to {output_path}")
+# Add sampling method to diffusion model (monkey patch or extend)
+def add_td3b_sampling_to_model(model):
+    """
+    Adds TD3B-specific sampling method to the model.
+    This is a helper function to extend the existing model.
+    """
+    def sample_finetuned_td3b(self, args, reward_model, batch_size=50, dataframe=False):
+        """
+        TD3B-specific sampling that returns directional metrics.
+        """
+        self.backbone.eval()
+        self.noise.eval()
+        if batch_size is None:
+            batch_size = args.batch_size
+        eps = getattr(args, "sampling_eps", 1e-5)
+        num_steps = args.total_num_steps
+        x_rollout = self.sample_prior(
+            batch_size,
+            args.seq_length).to(self.device, dtype=torch.long)
+        timesteps = torch.linspace(1, eps, num_steps + 1, device=self.device)
+        dt = torch.tensor((1 - eps) / num_steps, device=self.device)
+        for i in range(num_steps):
+            t = timesteps[i] * torch.ones(x_rollout.shape[0], 1, device=self.device)
+            log_p, x_next = self.single_reverse_step(x_rollout, t=t, dt=dt)
+            x_rollout = x_next.to(self.device)
+        mask_positions = (x_rollout == self.mask_index)
+        if mask_positions.any().item():
+            log_p, x_next = self.single_noise_removal(x_rollout, t=t, dt=dt)
+            x_rollout = x_next.to(self.device)
+        # Convert x to sequences to get valid ones
+        from utils.app import PeptideAnalyzer
+        analyzer = PeptideAnalyzer()
+        sequences = self.tokenizer.batch_decode(x_rollout)
+        valid_mask = torch.tensor([analyzer.is_peptide(seq) for seq in sequences], device=self.device)
+        valid_sequences = [seq for seq, keep in zip(sequences, valid_mask.tolist()) if keep]
+        valid_x_final = x_rollout[valid_mask] if valid_mask.any().item() else torch.empty(0, device=self.device)
+        valid_fraction = len(valid_sequences) / batch_size
+        if len(valid_sequences) > 0:
+            result = reward_model(valid_sequences)
+            if isinstance(result, tuple):
+                total_rewards, info = result
+                affinity = np.asarray(info.get('affinities', total_rewards))
+                confidence = np.asarray(info.get('confidences', np.ones_like(affinity)))
+                direction_predictions = np.asarray(info.get('directions', np.zeros_like(affinity)))
+            else:
+                total_rewards = np.asarray(result)
+                if total_rewards.ndim > 1:
+                    affinity = total_rewards[:, 0]
+                else:
+                    affinity = total_rewards
+                confidence = np.ones_like(affinity)
+                direction_predictions = np.zeros_like(affinity)
+            rewards_t = torch.as_tensor(total_rewards, dtype=torch.float32, device=self.device)
+            alpha = max(float(getattr(args, "alpha", 0.1)), 1e-6)
+            weights = torch.softmax(rewards_t / alpha, dim=0)
+            idx = torch.multinomial(weights, num_samples=batch_size, replacement=True)
+            idx_np = idx.detach().cpu().numpy()
+            x_resampled = valid_x_final[idx]
+            sequences = [valid_sequences[i] for i in idx_np]
+            total_rewards = total_rewards[idx_np]
+            affinity = affinity[idx_np]
+            confidence = confidence[idx_np]
+            direction_predictions = direction_predictions[idx_np]
+        else:
+            x_resampled = x_rollout
+            total_rewards = np.array([])
+            affinity = np.array([])
+            confidence = np.array([])
+            direction_predictions = np.array([])
+        eval_metrics = {
+            'affinity': affinity,
+            'gated_reward': total_rewards,
+            'confidence': confidence,
+            'direction_predictions': direction_predictions,
+            'valid_fraction': valid_fraction
+        }
+        if dataframe:
+            df = pd.DataFrame({
+                'sequence': sequences if len(total_rewards) else [],
+                'affinity': affinity,
+                'gated_reward': total_rewards,
+                'confidence': confidence
+            })
+            return x_resampled, eval_metrics, df
+        else:
+            return x_resampled, eval_metrics
+    # Attach method to model
+    model.sample_finetuned_td3b = sample_finetuned_td3b.__get__(model, type(model))
+    return model

td3b/td3b_losses.py ADDED Viewed

	@@ -0,0 +1,527 @@

+"""
+TD3B Loss Functions
+Implements contrastive loss for separating agonist/antagonist embeddings.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import Optional, Tuple
+class ContrastiveLoss(nn.Module):
+    """
+    Margin-based contrastive loss for separating agonist and antagonist embeddings.
+    For a pair of sequences (y_i, y_j):
+        - If both are agonists OR both are antagonists (similar): minimize distance
+        - If one is agonist and one is antagonist (dissimilar): maximize distance
+    Loss formula:
+        L_ctr = (1 - y_ij) * 0.5 * d²
+              + y_ij * 0.5 * max(0, margin - d)²
+    where:
+        - d = ||emb_i - emb_j||_2 (Euclidean distance)
+        - y_ij = 0 if similar, 1 if dissimilar
+        - margin = minimum distance between dissimilar pairs
+    """
+    def __init__(self, margin: float = 1.0, distance_metric: str = 'euclidean', adaptive_margin: bool = False):
+        """
+        Args:
+            margin: Minimum distance between dissimilar pairs (base margin)
+            distance_metric: 'euclidean' or 'cosine'
+            adaptive_margin: If True, adjust margin based on actual dissimilar distances
+        """
+        super().__init__()
+        self.base_margin = margin
+        self.distance_metric = distance_metric
+        self.adaptive_margin = adaptive_margin
+    def compute_distance(self, emb1: torch.Tensor, emb2: torch.Tensor) -> torch.Tensor:
+        """
+        Compute pairwise distance between embeddings.
+        Args:
+            emb1: (batch_size, embedding_dim)
+            emb2: (batch_size, embedding_dim)
+        Returns:
+            distances: (batch_size,)
+        """
+        if self.distance_metric == 'euclidean':
+            # L2 distance
+            distances = torch.norm(emb1 - emb2, p=2, dim=-1)  # (B,)
+        elif self.distance_metric == 'cosine':
+            # Cosine distance = 1 - cosine_similarity
+            cos_sim = F.cosine_similarity(emb1, emb2, dim=-1)  # (B,)
+            distances = 1.0 - cos_sim
+        else:
+            raise ValueError(f"Unknown distance metric: {self.distance_metric}")
+        return distances
+    def forward(
+        self,
+        embeddings: torch.Tensor,
+        labels: torch.Tensor,
+        confidences: Optional[torch.Tensor] = None,
+        debug: bool = False
+    ) -> torch.Tensor:
+        """
+        Compute contrastive loss for a batch.
+        Args:
+            embeddings: (batch_size, embedding_dim) sequence embeddings
+            labels: (batch_size,) directional labels in {-1, +1}
+                +1 = agonist, -1 = antagonist
+            confidences: (batch_size,) oracle confidence scores; pairs with product <= 0 are masked out
+            debug: If True, print detailed debugging information
+        Returns:
+            loss: scalar contrastive loss
+        """
+        batch_size = embeddings.size(0)
+        if batch_size < 2:
+            if debug:
+                print(f"[ContrastiveLoss DEBUG] Batch size {batch_size} < 2, returning 0 loss")
+            return torch.tensor(0.0, device=embeddings.device)
+        if confidences is not None:
+            if not torch.is_tensor(confidences):
+                confidences = torch.as_tensor(confidences, device=embeddings.device)
+            else:
+                confidences = confidences.to(embeddings.device)
+            confidences = confidences.view(-1)
+            if confidences.numel() != batch_size:
+                raise ValueError(
+                    f"Confidences size {confidences.numel()} does not match batch size {batch_size}"
+                )
+        # Compute pairwise distances (all pairs)
+        if self.distance_metric == 'euclidean':
+            distances = torch.cdist(embeddings, embeddings, p=2)  # (B, B)
+        elif self.distance_metric == 'cosine':
+            emb_norm = F.normalize(embeddings, p=2, dim=-1)
+            distances = 1.0 - torch.matmul(emb_norm, emb_norm.T)  # (B, B)
+        else:
+            raise ValueError(f"Unknown distance metric: {self.distance_metric}")
+        # Compute pairwise similarity labels
+        # y_ij = 0 if same class (both agonist or both antagonist)
+        # y_ij = 1 if different class
+        labels = labels.view(-1)
+        labels_expanded = labels.unsqueeze(1)  # (B, 1)
+        label_product = labels_expanded * labels_expanded.T  # (B, B)
+        # label_product > 0 means same class (both +1 or both -1)
+        # label_product < 0 means different class
+        dissimilar_mask = (label_product < 0)  # (B, B) bool
+        # Exclude diagonal
+        eye_mask = torch.eye(batch_size, device=embeddings.device, dtype=torch.bool)
+        pos_mask = (~dissimilar_mask) & ~eye_mask
+        neg_mask = dissimilar_mask & ~eye_mask
+        # Apply confidence mask: remove pairs with confidence product <= 0
+        conf_mask = None
+        if confidences is not None:
+            conf_product = confidences.unsqueeze(0) * confidences.unsqueeze(1)
+            conf_mask = conf_product > 0
+            pos_mask = pos_mask & conf_mask
+            neg_mask = neg_mask & conf_mask
+        # Adaptive margin: set margin based on actual dissimilar distances
+        if self.adaptive_margin and neg_mask.any():
+            # Get all dissimilar distances
+            dissimilar_distances = distances[neg_mask]
+            # Set margin to 150% of mean dissimilar distance
+            # This ensures there's always room for optimization
+            adaptive_margin = 1.5 * dissimilar_distances.mean().item()
+            # Use max of base_margin and adaptive_margin
+            margin = max(self.base_margin, adaptive_margin)
+        else:
+            margin = self.base_margin
+        pos_count = pos_mask.sum()
+        neg_count = neg_mask.sum()
+        total_pairs = pos_count + neg_count
+        if total_pairs.item() == 0:
+            if debug:
+                print("[ContrastiveLoss DEBUG] No valid pairs after filtering, returning 0 loss")
+            return torch.tensor(0.0, device=embeddings.device)
+        # Contrastive loss
+        # For similar pairs: minimize squared distance
+        # For dissimilar pairs: squared hinge loss with margin
+        pos_loss = distances[pos_mask].pow(2).sum() / (pos_count + 1e-8)
+        neg_loss = torch.clamp(margin - distances[neg_mask], min=0.0).pow(2).sum() / (neg_count + 1e-8)
+        loss = pos_loss + neg_loss
+        if debug:
+            print(f"\n[ContrastiveLoss DEBUG]")
+            print(f"  Batch size: {batch_size}")
+            print(f"  Labels: {labels.cpu().tolist()}")
+            print(f"  Unique labels: {torch.unique(labels).cpu().tolist()}")
+            print(f"  Embedding shape: {embeddings.shape}")
+            print(f"  Embedding norm (mean): {embeddings.norm(dim=-1).mean().item():.4f}")
+            print(f"  Embedding norm (std): {embeddings.norm(dim=-1).std().item():.4f}")
+            valid_mask = pos_mask | neg_mask
+            if valid_mask.any():
+                valid_dists = distances[valid_mask]
+                print(f"  Distance stats (valid pairs): mean={valid_dists.mean().item():.4f} "
+                      f"min={valid_dists.min().item():.4f} max={valid_dists.max().item():.4f}")
+            if self.adaptive_margin and neg_mask.any():
+                print(f"  Margin: {margin:.4f} (adaptive, base={self.base_margin})")
+            else:
+                print(f"  Margin: {margin:.4f} (fixed)")
+            print(f"  Num similar pairs: {pos_count.item():.0f}")
+            print(f"  Num dissimilar pairs: {neg_count.item():.0f}")
+            if conf_mask is not None:
+                print(f"  Confidence-passing pairs: {conf_mask.sum().item():.0f}")
+            print(f"  Similar loss (mean): {pos_loss.item():.4f}")
+            print(f"  Dissimilar loss (mean): {neg_loss.item():.4f}")
+            print(f"  Total loss: {loss.item():.4f}")
+            # Show which dissimilar pairs have margin violations
+            margin_violations = (distances < margin) & neg_mask
+            if margin_violations.sum() > 0:
+                print(f"  Margin violations: {margin_violations.sum().item():.0f} dissimilar pairs have distance < margin")
+            else:
+                print(f"  Margin violations: 0 (all dissimilar pairs are already separated)")
+        return loss
+class InfoNCELoss(nn.Module):
+    """
+    Alternative: InfoNCE contrastive loss (used in SimCLR, CLIP).
+    Treats agonists as positive class, antagonists as negative class.
+    For each agonist, pull it close to other agonists and push away from antagonists.
+    For each antagonist, pull it close to other antagonists and push away from agonists.
+    """
+    def __init__(self, temperature: float = 0.1):
+        """
+        Args:
+            temperature: Temperature parameter for softmax
+        """
+        super().__init__()
+        self.temperature = temperature
+    def forward(
+        self,
+        embeddings: torch.Tensor,
+        labels: torch.Tensor,
+        confidences: Optional[torch.Tensor] = None,
+        debug: bool = False
+    ) -> torch.Tensor:
+        """
+        Compute InfoNCE loss.
+        Args:
+            embeddings: (batch_size, embedding_dim)
+            labels: (batch_size,) in {-1, +1}
+            confidences: (batch_size,) oracle confidence scores; pairs with product <= 0 are masked out
+            debug: Unused (kept for API compatibility)
+        Returns:
+            loss: scalar
+        """
+        batch_size = embeddings.size(0)
+        if confidences is not None:
+            if not torch.is_tensor(confidences):
+                confidences = torch.as_tensor(confidences, device=embeddings.device)
+            else:
+                confidences = confidences.to(embeddings.device)
+            confidences = confidences.view(-1)
+            if confidences.numel() != batch_size:
+                raise ValueError(
+                    f"Confidences size {confidences.numel()} does not match batch size {batch_size}"
+                )
+        if batch_size < 2:
+            return torch.tensor(0.0, device=embeddings.device)
+        # Normalize embeddings
+        embeddings = F.normalize(embeddings, p=2, dim=-1)  # (B, D)
+        # Compute similarity matrix
+        similarity = torch.matmul(embeddings, embeddings.T) / self.temperature  # (B, B)
+        # Create positive/negative masks
+        labels_expanded = labels.unsqueeze(1)  # (B, 1)
+        label_product = labels_expanded * labels_expanded.T  # (B, B)
+        positive_mask = (label_product > 0)  # Same class
+        negative_mask = (label_product < 0)  # Different class
+        # Remove self-similarity
+        positive_mask.fill_diagonal_(0)
+        if confidences is not None:
+            conf_product = confidences.unsqueeze(0) * confidences.unsqueeze(1)
+            conf_mask = conf_product > 0
+            positive_mask = positive_mask & conf_mask
+            negative_mask = negative_mask & conf_mask
+        # For each sample, compute InfoNCE loss
+        # log( exp(sim_pos) / (exp(sim_pos) + sum(exp(sim_neg))) )
+        losses = []
+        for i in range(batch_size):
+            # Positive samples
+            pos_sims = similarity[i][positive_mask[i]]  # (num_pos,)
+            # Negative samples
+            neg_sims = similarity[i][negative_mask[i]]  # (num_neg,)
+            # Check if there are positive samples
+            if pos_sims.numel() == 0:
+                continue
+            # LogSumExp for numerical stability
+            pos_exp = torch.exp(pos_sims)  # (num_pos,)
+            neg_exp = torch.exp(neg_sims)  # (num_neg,)
+            if neg_exp.numel() == 0:
+                continue
+            # Average over positive samples
+            denominator = pos_exp.sum() + neg_exp.sum()
+            loss_i = -torch.log(pos_exp.sum() / (denominator + 1e-8))
+            losses.append(loss_i)
+        if len(losses) == 0:
+            return torch.tensor(0.0, device=embeddings.device)
+        return torch.stack(losses).mean()
+class TD3BTotalLoss:
+    """
+    Combined TD3B loss: L_total = L_WDCE + λ * L_ctr + β * L_KL
+    Components:
+        - L_WDCE: Weighted Denoising Cross-Entropy (from TR2-D2)
+        - L_ctr: Contrastive loss for agonist/antagonist separation
+        - L_KL: KL divergence regularization between policy and reference model
+    """
+    def __init__(
+        self,
+        contrastive_weight: float = 0.1,
+        contrastive_margin: float = 1.0,
+        contrastive_type: str = 'margin',  # 'margin' or 'infonce'
+        kl_beta: float = 0.1,  # β coefficient for KL divergence
+        reference_model: Optional[nn.Module] = None,
+        adaptive_margin: bool = True  # Enable adaptive margin by default
+    ):
+        """
+        Args:
+            contrastive_weight: λ coefficient for contrastive loss
+            contrastive_margin: Margin for margin-based contrastive loss (base margin if adaptive)
+            contrastive_type: Type of contrastive loss ('margin' or 'infonce')
+            kl_beta: β coefficient for KL divergence regularization
+            reference_model: Frozen reference model for KL divergence (deepcopy of pretrained)
+            adaptive_margin: If True, automatically adjust margin based on dissimilar distances
+        """
+        self.contrastive_weight = contrastive_weight
+        self.kl_beta = kl_beta
+        self.reference_model = reference_model
+        # Freeze reference model if provided
+        if self.reference_model is not None:
+            self.reference_model.eval()
+            for param in self.reference_model.parameters():
+                param.requires_grad = False
+            # Verify all parameters are frozen
+            assert all(not p.requires_grad for p in self.reference_model.parameters()), \
+                "ERROR: Reference model has parameters with requires_grad=True!"
+        if contrastive_type == 'margin':
+            self.contrastive_loss = ContrastiveLoss(
+                margin=contrastive_margin,
+                distance_metric='euclidean',
+                adaptive_margin=adaptive_margin
+            )
+        elif contrastive_type == 'infonce':
+            self.contrastive_loss = InfoNCELoss(temperature=0.1)
+        else:
+            raise ValueError(f"Unknown contrastive type: {contrastive_type}")
+    def compute_kl_categorical(
+        self,
+        log_p: torch.Tensor,
+        log_ref_p: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Compute KL divergence between categorical distributions.
+        KL(P || Q) = Σ P(x) * log(P(x) / Q(x))
+                   = Σ P(x) * (log P(x) - log Q(x))
+        Args:
+            log_p: (B, L, Vocab) log-probabilities from policy model
+            log_ref_p: (B, L, Vocab) log-probabilities from reference model
+        Returns:
+            kl: (B, L) KL divergence per position
+        """
+        # Convert log-probs to probabilities
+        p = torch.exp(log_p)  # (B, L, Vocab)
+        # KL divergence element-wise
+        kl_elementwise = p * (log_p - log_ref_p)  # (B, L, Vocab)
+        # Handle numerical issues: 0 * log(0) should be 0
+        # Replace NaNs or Infs that occur at -inf locations with 0
+        kl_elementwise = torch.where(
+            torch.isfinite(kl_elementwise),
+            kl_elementwise,
+            torch.zeros_like(kl_elementwise)
+        )
+        # Sum over vocabulary dimension
+        kl = kl_elementwise.sum(dim=-1)  # (B, L)
+        return kl
+    def compute_kl_loss(
+        self,
+        policy_model: nn.Module,
+        sequences: torch.Tensor,
+        attn_mask: torch.Tensor,
+        sigma: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Compute KL divergence loss between policy model and reference model.
+        Args:
+            policy_model: Current policy model (being trained)
+            sequences: (B, L) input sequences
+            attn_mask: (B, L) attention mask
+            sigma: (B,) noise schedule
+        Returns:
+            kl_loss: Scalar KL divergence loss
+        """
+        if self.reference_model is None:
+            return torch.tensor(0.0, device=sequences.device)
+        # Ensure reference model is in eval mode
+        assert not self.reference_model.training, \
+            "ERROR: Reference model is in training mode! It should always be in eval mode."
+        # Forward through policy model (already computed in WDCE, but need logits)
+        policy_logits = policy_model(sequences, attn_mask=attn_mask, sigma=sigma)  # (B, L, Vocab)
+        # Forward through reference model (frozen, no gradients)
+        with torch.no_grad():
+            ref_logits = self.reference_model(sequences, attn_mask=attn_mask, sigma=sigma)  # (B, L, Vocab)
+        # Convert to log-probabilities
+        log_p = F.log_softmax(policy_logits, dim=-1)  # (B, L, Vocab)
+        log_ref_p = F.log_softmax(ref_logits, dim=-1)  # (B, L, Vocab)
+        # Compute KL divergence
+        kl_per_position = self.compute_kl_categorical(log_p, log_ref_p)  # (B, L)
+        # Mask out padding positions
+        kl_masked = kl_per_position * attn_mask.float()  # (B, L)
+        # Average over all non-padding positions
+        num_valid = attn_mask.float().sum()
+        kl_loss = kl_masked.sum() / (num_valid + 1e-8)
+        return kl_loss
+    def compute_loss(
+        self,
+        wdce_loss: torch.Tensor,
+        embeddings: torch.Tensor,
+        directional_labels: torch.Tensor,
+        confidences: Optional[torch.Tensor] = None,
+        kl_loss: Optional[torch.Tensor] = None,
+        debug: bool = False
+    ) -> Tuple[torch.Tensor, dict]:
+        """
+        Compute total TD3B loss.
+        Args:
+            wdce_loss: Precomputed WDCE loss (scalar)
+            embeddings: (batch_size, embedding_dim) sequence embeddings from MDLM
+            directional_labels: (batch_size,) labels in {-1, +1}
+            confidences: (batch_size,) oracle confidence scores; pairs with product <= 0 are masked out
+            kl_loss: Precomputed KL divergence loss (optional)
+            debug: If True, enable debugging output in contrastive loss
+        Returns:
+            total_loss: Combined loss
+            loss_dict: Dictionary with individual loss components
+        """
+        # Contrastive loss (pass debug flag)
+        contrastive_loss = self.contrastive_loss(
+            embeddings,
+            directional_labels,
+            confidences=confidences,
+            debug=debug
+        )
+        # KL divergence loss
+        if kl_loss is None:
+            kl_loss = torch.tensor(0.0, device=embeddings.device)
+        # Total loss: L_total = L_WDCE + λ * L_ctr + β * L_KL
+        total_loss = wdce_loss + self.contrastive_weight * contrastive_loss + self.kl_beta * kl_loss
+        loss_dict = {
+            'total_loss': total_loss.item(),
+            'wdce_loss': wdce_loss.item(),
+            'contrastive_loss': contrastive_loss.item(),
+            'kl_loss': kl_loss.item() if isinstance(kl_loss, torch.Tensor) else kl_loss
+        }
+        return total_loss, loss_dict
+def extract_embeddings_from_mdlm(
+    model,
+    sequences: torch.Tensor,
+    pool_method: str = 'mean'
+) -> torch.Tensor:
+    """
+    Extract sequence-level embeddings from MDLM backbone.
+    Args:
+        model: MDLM model with backbone (Roformer)
+        sequences: (batch_size, seq_len) token sequences
+        pool_method: 'mean', 'max', or 'cls'
+    Returns:
+        embeddings: (batch_size, hidden_dim)
+    """
+    # Create attention mask (1 for real tokens, 0 for padding)
+    attn_mask = (sequences != 0).long()  # (B, L)
+    # Forward through Roformer backbone to get hidden states
+    # IMPORTANT: DO NOT use torch.no_grad() here - we need gradients for backprop!
+    # Access the underlying RoFormerForMaskedLM model and request hidden states
+    outputs = model.backbone.model(
+        input_ids=sequences,
+        attention_mask=attn_mask,
+        output_hidden_states=True,
+        return_dict=True
+    )
+    # Extract last hidden state from outputs
+    # outputs.hidden_states is a tuple of (embedding_output, layer1, layer2, ..., layerN)
+    # We want the last layer's hidden states
+    hidden_states = outputs.hidden_states[-1]  # (B, L, D)
+    # Pool to get sequence-level embeddings
+    if pool_method == 'mean':
+        # Mean pooling (ignore padding)
+        mask = attn_mask.float().unsqueeze(-1)  # (B, L, 1)
+        pooled = (hidden_states * mask).sum(dim=1) / (mask.sum(dim=1) + 1e-8)  # (B, D)
+    elif pool_method == 'max':
+        # Max pooling
+        pooled = hidden_states.max(dim=1)[0]  # (B, D)
+    elif pool_method == 'cls':
+        # Use first token (CLS-style)
+        pooled = hidden_states[:, 0, :]  # (B, D)
+    else:
+        raise ValueError(f"Unknown pool method: {pool_method}")
+    return pooled

td3b/td3b_mcts.py ADDED Viewed

	@@ -0,0 +1,307 @@

+"""
+TD3B-specific MCTS modifications.
+Extends the base MCTS to support directional rewards and confidence weighting.
+"""
+import numpy as np
+import torch
+from peptide_mcts import MCTS as BaseMCTS
+from .td3b_scoring import TD3BRewardFunction, TD3BConfidenceWeighting
+class TD3B_MCTS(BaseMCTS):
+    """
+    TD3B version of MCTS that:
+    1. Uses gated directional rewards instead of multi-objective scalarization
+    2. Stores directional labels and confidence scores in the buffer
+    3. Applies confidence-weighted importance sampling
+    """
+    def __init__(
+        self,
+        args,
+        diffusion_model,
+        td3b_reward_function: TD3BRewardFunction,
+        confidence_weighting: TD3BConfidenceWeighting,
+        mask_index: int,
+        buffer_size: int = 100,
+        noise=None,
+        tokenizer=None
+    ):
+        """
+        Args:
+            args: Configuration arguments
+            diffusion_model: MDLM model for sampling
+            td3b_reward_function: TD3BRewardFunction instance
+            confidence_weighting: TD3BConfidenceWeighting instance
+            mask_index: Token ID for masked positions
+            buffer_size: Maximum buffer size
+            noise: Noise schedule
+            tokenizer: Peptide tokenizer
+        """
+        # Initialize base MCTS (will set self.rewardFunc later)
+        # Note: base MCTS expects 'policy_model' not 'diffusion_model'
+        # Create a minimal config object for base MCTS
+        class MinimalConfig:
+            def __init__(self):
+                self.noise = type('obj', (object,), {
+                    'type': 'loglinear',
+                    'sigma_min': 1e-4,
+                    'sigma_max': 20
+                })()
+        config = MinimalConfig()
+        super().__init__(
+            args=args,
+            config=config,
+            policy_model=diffusion_model,
+            pretrained=diffusion_model,  # Use same model
+            score_func_names=['affinity', 'gated_reward', 'placeholder1', 'placeholder2', 'placeholder3']  # 5 objectives
+        )
+        # Set TD3B-specific attributes
+        self.td3b_reward_func = td3b_reward_function
+        self.confidence_weighting = confidence_weighting
+        self.mask_index = mask_index
+        self.buffer_size = buffer_size
+        self.noise = noise
+        self.tokenizer = tokenizer if tokenizer is not None else diffusion_model.tokenizer
+        # Override num_obj to ensure it's 5 (matching our padded rewards)
+        self.num_obj = 5
+        # Override rewardFunc for compatibility
+        self.rewardFunc = self._td3b_reward_wrapper
+    def _td3b_reward_wrapper(self, input_seqs):
+        """
+        Wrapper to make TD3BRewardFunction compatible with existing MCTS interface.
+        Returns (N, 5) array to match base MCTS expectations.
+        The 5 columns are: [affinity, gated_reward, 0, 0, 0] (padding last 3)
+        """
+        import numpy as np
+        total_rewards, info = self.td3b_reward_func(input_seqs)
+        # info contains: 'affinities', 'confidences', 'score_vectors'
+        # Store confidences for later use (attach to self for access in updateBuffer)
+        self._last_confidences = info['confidences']
+        # Pad score_vectors from (N, 2) to (N, 5) to match base MCTS
+        # Original columns: [affinity, gated_reward]
+        # Padded to: [affinity, gated_reward, 0, 0, 0]
+        score_vectors = info['score_vectors']  # (N, 2)
+        padded = np.zeros((score_vectors.shape[0], 5))
+        padded[:, :2] = score_vectors  # Copy affinity and gated_reward
+        return padded
+    def updateBuffer(self, x_final, log_rnd, score_vectors, childSequences):
+        """
+        TD3B version: stores directional labels and confidence scores.
+        Args:
+            x_final: (B, L) final sequence tokens
+            log_rnd: (B,) log importance weights (trajectory-level)
+            score_vectors: (B, K) score arrays
+            childSequences: List of B SMILES strings
+        Returns:
+            traj_log_rnds: (B,) updated log importance weights
+            scalar_rewards: (B,) scalar rewards
+        """
+        B = x_final.shape[0]
+        traj_log_rnds, scalar_rewards = [], []
+        # Get confidences from last reward computation
+        confidences = getattr(self, '_last_confidences', np.ones(B))
+        for i in range(B):
+            sv = np.asarray(score_vectors[i], dtype=float)  # [affinity, gated_reward]
+            confidence = confidences[i]
+            # For TD3B, the "scalar reward" is the gated reward (second element)
+            scalar_reward = float(sv[1])  # gated_reward = g_ψ · (d* · sigmoid(f_φ-0.5)/α)
+            # Compute confidence-weighted importance weight
+            # w(y) = κ(y) · exp(S_total / α)
+            # In log space: log w(y) = log κ(y) + S_total / α
+            log_confidence = np.log(np.maximum(confidence, self.confidence_weighting.min_confidence))
+            traj_log_rnd = log_rnd[i] + (scalar_reward / self.args.alpha) + log_confidence
+            # Infer directional label from oracle (sign of gated reward)
+            # If gated_reward > 0, peptide is predicted as target direction
+            # This is approximate; in practice you might want to query f_φ directly
+            directional_label = np.sign(scalar_reward) if scalar_reward != 0 else 0.0
+            item = {
+                "x_final": x_final[i].clone(),
+                "log_rnd": traj_log_rnd.clone() if isinstance(traj_log_rnd, torch.Tensor) else torch.tensor(traj_log_rnd),
+                "final_reward": scalar_reward,
+                "score_vector": sv.copy(),
+                "seq": childSequences[i],
+                # TD3B-specific additions
+                "directional_label": directional_label,
+                "confidence": confidence,
+            }
+            # Pareto dominance filtering (same as base class)
+            from peptide_mcts import dominated_by, dominates
+            if any(dominated_by(sv, bi["score_vector"]) for bi in self.buffer):
+                self._debug_buffer_decision(sv, "rejected_dominated")
+                continue
+            # Remove dominated items
+            keep = []
+            for bi in self.buffer:
+                if not dominates(sv, bi["score_vector"]):
+                    keep.append(bi)
+            self.buffer = keep
+            # Insert with capacity constraint
+            if len(self.buffer) < self.buffer_size:
+                self.buffer.append(item)
+            else:
+                # Replace worst item
+                worst_i = int(np.argmin([np.sum(bi["score_vector"]) for bi in self.buffer]))
+                self.buffer[worst_i] = item
+            self._debug_buffer_decision(sv, "inserted", {"new_len": len(self.buffer)})
+            traj_log_rnds.append(traj_log_rnd)
+            scalar_rewards.append(scalar_reward)
+        traj_log_rnds = torch.stack([torch.tensor(x) if not isinstance(x, torch.Tensor) else x for x in traj_log_rnds], dim=0) if traj_log_rnds else torch.empty(0)
+        scalar_rewards = np.asarray(scalar_rewards, dtype=float)
+        return traj_log_rnds, scalar_rewards
+    def forward(self, resetTree=False):
+        """
+        TD3B version of forward that returns 7 values.
+        Returns:
+            x_final: (N, L) sequence tokens
+            log_rnd: (N,) log importance weights
+            final_rewards: (N,) scalar rewards
+            score_vectors: (N, K) score arrays
+            sequences: List of N SMILES strings
+            directional_labels: (N,) directional labels
+            confidences: (N,) confidence scores
+        """
+        self.reset(resetTree)
+        while (self.iter_num < self.num_iter):
+            self.iter_num += 1
+            # traverse the tree form the root node until a leaf node
+            with self.timer.section("select"):
+                leafNode, _ = self.select(self.rootNode)
+            # expand leaf node into num_children partially unmasked sequences at the next timestep
+            with self.timer.section("expand"):
+                self.expand(leafNode)
+        final_x, log_rnd, final_rewards, score_vectors, sequences, directional_labels, confidences = self.consolidateBuffer()
+        rows = self.timer.summary()
+        print("\n=== Timing summary (by total time) ===")
+        for name, cnt, total, mean, p50, p95 in rows:
+            print(f"{name:30s}  n={cnt:5d}  total={total:8.3f}s  mean={mean*1e3:7.2f}ms  "
+                f"p50={p50*1e3:7.2f}ms  p95={p95*1e3:7.2f}ms")
+        return final_x, log_rnd, final_rewards, score_vectors, sequences, directional_labels, confidences
+    def consolidateBuffer(self):
+        """
+        TD3B version: includes directional labels and confidences.
+        Returns:
+            x_final: (N, L) sequence tokens
+            log_rnd: (N,) log importance weights
+            final_rewards: (N,) scalar rewards
+            score_vectors: (N, K) score arrays
+            sequences: List of N SMILES strings
+            directional_labels: (N,) directional labels
+            confidences: (N,) confidence scores
+        """
+        # Handle empty buffer case - return empty tensors/arrays
+        if len(self.buffer) == 0:
+            import logging
+            logger = logging.getLogger(__name__)
+            logger.warning("MCTS buffer is empty - no valid sequences found. Returning empty results.")
+            # Return empty tensors/arrays with correct shapes
+            # Use policy_model (set by base MCTS class) to get device
+            device = self.policy_model.device if hasattr(self.policy_model, 'device') else 'cpu'
+            return (
+                torch.empty(0, 0, dtype=torch.long, device=device),  # x_final: (0, 0)
+                torch.empty(0, dtype=torch.float32, device=device),  # log_rnd: (0,)
+                np.empty(0, dtype=np.float32),  # final_rewards: (0,)
+                np.empty((0, 0), dtype=np.float32),  # score_vectors: (0, 0)
+                [],  # sequences: empty list
+                np.empty(0, dtype=np.float32),  # directional_labels: (0,)
+                np.empty(0, dtype=np.float32)   # confidences: (0,)
+            )
+        x_final = []
+        log_rnd = []
+        final_rewards = []
+        score_vectors = []
+        sequences = []
+        directional_labels = []
+        confidences = []
+        for item in self.buffer:
+            x_final.append(item["x_final"])
+            log_rnd.append(item["log_rnd"])
+            final_rewards.append(item["final_reward"])
+            score_vectors.append(item["score_vector"])
+            sequences.append(item["seq"])
+            directional_labels.append(item.get("directional_label", 0.0))
+            confidences.append(item.get("confidence", 1.0))
+        x_final = torch.stack(x_final, dim=0)  # (N, L)
+        log_rnd = torch.stack(log_rnd, dim=0).to(dtype=torch.float32)  # (N,)
+        final_rewards = np.stack(final_rewards, axis=0).astype(np.float32)
+        score_vectors = np.stack(score_vectors, axis=0).astype(np.float32)
+        directional_labels = np.array(directional_labels, dtype=np.float32)
+        confidences = np.array(confidences, dtype=np.float32)
+        return x_final, log_rnd, final_rewards, score_vectors, sequences, directional_labels, confidences
+def create_td3b_mcts(
+    args,
+    diffusion_model,
+    td3b_reward_function: TD3BRewardFunction,
+    alpha: float = 0.1,
+    **kwargs
+) -> TD3B_MCTS:
+    """
+    Factory function to create TD3B MCTS instance.
+    Args:
+        args: Configuration arguments
+        diffusion_model: MDLM model
+        td3b_reward_function: TD3BRewardFunction instance
+        alpha: Temperature for importance weighting
+        **kwargs: Additional MCTS arguments
+    Returns:
+        mcts: TD3B_MCTS instance
+    """
+    # Create confidence weighting module
+    confidence_weighting = TD3BConfidenceWeighting(
+        alpha=alpha,
+        min_confidence=0.1
+    )
+    # Create TD3B MCTS
+    mcts = TD3B_MCTS(
+        args=args,
+        diffusion_model=diffusion_model,
+        td3b_reward_function=td3b_reward_function,
+        confidence_weighting=confidence_weighting,
+        **kwargs
+    )
+    return mcts

td3b/td3b_scoring.py ADDED Viewed

	@@ -0,0 +1,400 @@

+"""
+TD3B Scoring Functions
+Implements gated allosteric reward combining affinity prediction and directional oracle.
+"""
+import os
+import torch
+import numpy as np
+from typing import List, Tuple, Optional
+from .direction_oracle import DirectionalOracle
+from scoring.functions.binding import BindingAffinity
+class TD3BRewardFunction:
+    """
+    Implements the TD3B gated total reward with sigmoid temperature scaling:
+        S_total(y; d*, x) = g_ψ(y, x) · σ(d* · (f_φ(y, x) -0.5) / τ)
+    where:
+        - g_ψ(y, x): affinity predictor (BindingAffinity)
+        - σ: sigmoid function σ(z) = 1 / (1 + exp(-z))
+        - d* ∈ {+1, -1}: target direction (agonist/antagonist)
+        - f_φ(y, x): directional oracle (DirectionalOracle)
+          * Directional oracle outputs p(agonist) in [0, 1]
+        - τ: temperature parameter (lower = sharper distribution)
+        - y: peptide sequence
+        - x: target protein sequence
+    Note: The placeholder oracle outputs 0.5, which makes (f_φ - 0.5) = 0, resulting in
+    neutral gating during initial training before a real oracle is trained.
+    Benefits of sigmoid formulation:
+        1. Output always in [0, 1] → bounded gated rewards
+        2. Temperature τ controls sharpness of selection
+        3. Differentiable gating for smooth optimization
+        4. Sharper discrimination between aligned and misaligned directions
+    OLD FORMULA (replaced):
+        S_total(y; d*, x) = g_ψ(y, x) · (d* · f_φ(y, x))
+    """
+    def __init__(
+        self,
+        affinity_predictor: BindingAffinity,
+        directional_oracle: DirectionalOracle,
+        target_direction: float,  # +1 for agonist, -1 for antagonist
+        target_protein_tokens: torch.Tensor,
+        peptide_tokenizer,
+        device: torch.device,
+        min_affinity_threshold: float = 0.0,  # Minimum g_ψ for allosteric control
+        use_confidence_weighting: bool = True,
+        temperature: float = 0.1  # Temperature for sigmoid sharpening
+    ):
+        """
+        Args:
+            affinity_predictor: Pretrained g_ψ model (BindingAffinity)
+            directional_oracle: Pretrained f_φ model (DirectionalOracle)
+            target_direction: d* in {+1, -1} for agonist/antagonist
+            target_protein_tokens: Tokenized target protein sequence
+            peptide_tokenizer: Tokenizer for converting SMILES to tokens
+            device: Computation device
+            min_affinity_threshold: Only apply directional control if g_ψ > threshold
+            use_confidence_weighting: Whether to use κ(y) for importance weights
+            temperature: Temperature τ for sigmoid sharpening (lower = sharper)
+                        Default 0.1 makes distribution sharper than standard sigmoid
+        """
+        self.g_psi = affinity_predictor  # Affinity predictor
+        self.f_phi = directional_oracle  # Directional oracle
+        self.target_direction = target_direction  # d* ∈ {+1, -1}
+        self.protein_tokens = target_protein_tokens
+        self.peptide_tokenizer = peptide_tokenizer
+        self.device = device
+        self.min_affinity_threshold = min_affinity_threshold
+        self.use_confidence_weighting = use_confidence_weighting
+        self.temperature = temperature  # τ for sigmoid temperature
+    def compute_affinity(self, peptide_seqs: List[str]) -> np.ndarray:
+        """
+        Compute binding affinity g_ψ(y, x).
+        Args:
+            peptide_seqs: List of peptide SMILES strings
+        Returns:
+            affinities: (N,) array of affinity scores
+        """
+        affinities = self.g_psi(peptide_seqs)  # Returns list of scores
+        return np.array(affinities)
+    def compute_direction(self, peptide_seqs: List[str]) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Compute directional bias f_φ(y, x) and confidence κ(y).
+        Args:
+            peptide_seqs: List of peptide SMILES strings
+        Returns:
+            directions: (N,) tensor of directional biases
+                - DirectionalOracle: p(agonist) in [0, 1]
+            confidences: (N,) tensor of confidence scores in [0, 1]
+        """
+        # Tokenize peptides in a single batch for speed
+        peptide_tokens = None
+        peptide_token_dict = None
+        try:
+            peptide_token_dict = self.peptide_tokenizer(
+                peptide_seqs,
+                return_tensors='pt',
+                padding=True
+            )
+            peptide_token_dict = {k: v.to(self.device) for k, v in peptide_token_dict.items()}
+            peptide_tokens = peptide_token_dict.get('input_ids')
+        except Exception:
+            peptide_tokens_list = []
+            for seq in peptide_seqs:
+                tokens = self.peptide_tokenizer(seq, return_tensors='pt', padding=True)
+                peptide_tokens_list.append(tokens['input_ids'].to(self.device))
+            # Batch tokenization (simple stacking, assumes same length after padding)
+            try:
+                peptide_tokens = torch.cat(peptide_tokens_list, dim=0)  # (N, L)
+            except Exception:
+                # Fallback: pad to max length
+                max_len = max(t.size(1) for t in peptide_tokens_list)
+                peptide_tokens = torch.zeros(len(peptide_tokens_list), max_len, dtype=torch.long, device=self.device)
+                for i, tokens in enumerate(peptide_tokens_list):
+                    peptide_tokens[i, :tokens.size(1)] = tokens[0]
+        # Expand protein tokens to batch size
+        protein_tokens = self.protein_tokens.expand(len(peptide_seqs), -1)  # (N, L_prot)
+        # Compute direction and confidence
+        with torch.no_grad():
+            if peptide_token_dict is not None and hasattr(self.f_phi, "_normalize_token_dict"):
+                directions, confidences = self.f_phi.predict_with_confidence(
+                    peptide_token_dict, protein_tokens
+                )
+            else:
+                directions, confidences = self.f_phi.predict_with_confidence(
+                    peptide_tokens, protein_tokens
+                )
+        return directions, confidences
+    def compute_gated_reward(
+        self,
+        peptide_seqs: List[str]
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Compute gated total reward with sigmoid temperature scaling.
+        NEW FORMULA:
+            S_total = g_ψ · σ(d* · (f_φ-0.5) / τ)
+        Where:
+            - g_ψ: affinity score
+            - σ: sigmoid function
+            - d*: target direction (+1 or -1)
+            - f_φ: directional oracle prediction (in [-1, +1])
+            - τ: temperature (lower = sharper distribution)
+        OLD FORMULA (replaced):
+            S_total = g_ψ · (d* · f_φ)
+        Args:
+            peptide_seqs: List of peptide SMILES strings
+        Returns:
+            total_rewards: (N,) array of gated total rewards
+            affinities: (N,) array of affinity scores g_ψ
+            confidences: (N,) array of confidence scores κ
+            directions: (N,) array of directional predictions f_φ
+        """
+        # Compute affinity g_ψ(y, x)
+        affinities = self.compute_affinity(peptide_seqs)  # (N,)
+        # Compute directional bias f_φ(y, x) and confidence κ(y)
+        directions, confidences = self.compute_direction(peptide_seqs)  # (N,), (N,)
+        directions = directions.cpu().numpy()
+        confidences = confidences.cpu().numpy()
+        # NEW: Sigmoid-based gated reward with temperature scaling
+        # S_total = g_ψ · σ(d* · (f_φ-0.5) / τ), use 0.5 as the threshold to make it balanced/symmetric.
+        directional_score = self.target_direction * (directions - 0.5)  # (N,) in [-1, +1]
+        # Apply temperature scaling (lower τ → sharper sigmoid)
+        scaled_score = directional_score / self.temperature  # (N,)
+        # Apply sigmoid to get value in [0, 1]
+        # σ(x) = 1 / (1 + exp(-x))
+        sigmoid_weight = 1.0 / (1.0 + np.exp(-scaled_score))  # (N,) in [0, 1]
+        # Gate affinity with sigmoid weight
+        gated_rewards = affinities * sigmoid_weight  # (N,)
+        # Optional: only apply directional control if affinity is high enough
+        # This implements the "allosteric control only for binders" principle
+        low_affinity_mask = affinities < self.min_affinity_threshold
+        gated_rewards[low_affinity_mask] = affinities[low_affinity_mask] * 0.1  # Downweight
+        return gated_rewards, affinities, confidences, directions
+    def __call__(
+        self,
+        input_seqs: List[str]
+    ) -> Tuple[np.ndarray, dict]:
+        """
+        Main interface for reward computation.
+        Args:
+            input_seqs: List of peptide SMILES strings
+        Returns:
+            rewards: (N,) array of total rewards
+            info: dict with 'affinities', 'confidences', 'directions', 'score_vectors'
+        """
+        total_rewards, affinities, confidences, directions = self.compute_gated_reward(input_seqs)
+        info = {
+            'affinities': affinities,
+            'confidences': confidences,
+            'directions': directions,  # Add direction predictions
+            'score_vectors': np.stack([affinities, total_rewards], axis=1)  # (N, 2)
+        }
+        return total_rewards, info
+class TD3BConfidenceWeighting:
+    """
+    Implements confidence-weighted importance sampling for TD3B.
+    The importance weights w(y) are modulated by confidence κ(y):
+        w(y) = κ(y) · exp(S_total(y) / α)
+    This distinguishes between:
+        - Full agonists/antagonists: high κ (|f_φ| ≈ 1)
+        - Partial agonists/antagonists: medium κ (|f_φ| ≈ 0.5)
+        - Non-selective binders: low κ (|f_φ| ≈ 0)
+    """
+    def __init__(
+        self,
+        alpha: float = 0.1,  # Temperature for reward scaling
+        min_confidence: float = 0.1  # Minimum confidence to avoid zero weights
+    ):
+        """
+        Args:
+            alpha: Temperature parameter for reward scaling
+            min_confidence: Minimum confidence threshold
+        """
+        self.alpha = alpha
+        self.min_confidence = min_confidence
+    def compute_importance_weights(
+        self,
+        rewards: np.ndarray,
+        confidences: np.ndarray
+    ) -> np.ndarray:
+        """
+        Compute confidence-weighted importance weights.
+        Args:
+            rewards: (N,) array of total rewards S_total
+            confidences: (N,) array of confidence scores κ ∈ [0, 1]
+        Returns:
+            weights: (N,) array of importance weights
+        """
+        # Clip confidences to avoid zero weights
+        confidences = np.maximum(confidences, self.min_confidence)
+        # Compute importance weights: w(y) = κ(y) · exp(S_total / α)
+        log_weights = rewards / self.alpha  # (N,)
+        weights = confidences * np.exp(log_weights)  # (N,)
+        return weights
+    def compute_log_importance_weights(
+        self,
+        rewards: np.ndarray,
+        confidences: np.ndarray
+    ) -> np.ndarray:
+        """
+        Compute log importance weights for numerical stability.
+        Args:
+            rewards: (N,) array of total rewards
+            confidences: (N,) array of confidence scores
+        Returns:
+            log_weights: (N,) array of log importance weights
+        """
+        # Clip confidences
+        confidences = np.maximum(confidences, self.min_confidence)
+        # log w(y) = log κ(y) + S_total / α
+        log_weights = np.log(confidences) + (rewards / self.alpha)  # (N,)
+        return log_weights
+# Factory function for creating TD3B reward function
+def create_td3b_reward_function(
+    affinity_predictor: BindingAffinity,
+    target_protein_seq: str,
+    target_direction: str,  # 'agonist' or 'antagonist'
+    peptide_tokenizer,
+    device: torch.device,
+    directional_oracle: Optional[DirectionalOracle] = None,
+    directional_oracle_checkpoint: Optional[str] = None,
+    base_path: Optional[str] = None,
+    direction_oracle_tr2d2_checkpoint: Optional[str] = None,
+    direction_oracle_tokenizer_vocab: Optional[str] = None,
+    direction_oracle_tokenizer_splits: Optional[str] = None,
+    direction_oracle_esm_name: str = "facebook/esm2_t33_650M_UR50D",
+    direction_oracle_esm_cache_dir: Optional[str] = None,
+    direction_oracle_esm_local_files_only: bool = False,
+    direction_oracle_max_ligand_length: int = 768,
+    direction_oracle_max_protein_length: int = 1024,
+    direction_oracle_d_model: int = 256,
+    direction_oracle_n_heads: int = 4,
+    direction_oracle_n_self_attn_layers: int = 1,
+    direction_oracle_n_bmca_layers: int = 2,
+    direction_oracle_dropout: float = 0.3,
+    **kwargs
+) -> TD3BRewardFunction:
+    """
+    Factory function to create TD3B reward function.
+    Args:
+        affinity_predictor: Pretrained binding affinity model
+        directional_oracle: Preloaded DirectionalOracle instance (optional)
+        directional_oracle_checkpoint: Path to Directional oracle checkpoint (optional if instance provided)
+        base_path: Base path for default oracle assets
+        direction_oracle_tr2d2_checkpoint: TR2-D2 checkpoint for ligand encoder
+        direction_oracle_tokenizer_vocab: SMILES tokenizer vocab path
+        direction_oracle_tokenizer_splits: SMILES tokenizer splits path
+        target_protein_seq: Target protein amino acid sequence
+        target_direction: 'agonist' (+1) or 'antagonist' (-1)
+        peptide_tokenizer: Tokenizer for peptides
+        device: Computation device
+        **kwargs: Additional arguments for TD3BRewardFunction
+    Returns:
+        reward_function: TD3BRewardFunction instance
+    """
+    if directional_oracle is None:
+        if base_path is None:
+            base_path = "To Be Added"
+        tr2d2_root = os.path.join(base_path, "tr2d2-pep")
+        if directional_oracle_checkpoint is None:
+            directional_oracle_checkpoint = os.path.join(
+                tr2d2_root, "direction_oracle.pt"
+            )
+        if direction_oracle_tr2d2_checkpoint is None:
+            direction_oracle_tr2d2_checkpoint = os.path.join(
+                tr2d2_root, "pretrained", "peptune-pretrained.ckpt"
+            )
+        if direction_oracle_tokenizer_vocab is None:
+            direction_oracle_tokenizer_vocab = os.path.join(
+                tr2d2_root, "tokenizer", "new_vocab.txt"
+            )
+        if direction_oracle_tokenizer_splits is None:
+            direction_oracle_tokenizer_splits = os.path.join(
+                tr2d2_root, "tokenizer", "new_splits.txt"
+            )
+        directional_oracle = DirectionalOracle(
+            model_ckpt=directional_oracle_checkpoint,
+            tr2d2_checkpoint=direction_oracle_tr2d2_checkpoint,
+            tokenizer_vocab=direction_oracle_tokenizer_vocab,
+            tokenizer_splits=direction_oracle_tokenizer_splits,
+            esm_name=direction_oracle_esm_name,
+            d_model=direction_oracle_d_model,
+            n_heads=direction_oracle_n_heads,
+            n_self_attn_layers=direction_oracle_n_self_attn_layers,
+            n_bmca_layers=direction_oracle_n_bmca_layers,
+            dropout=direction_oracle_dropout,
+            max_ligand_length=direction_oracle_max_ligand_length,
+            max_protein_length=direction_oracle_max_protein_length,
+            device=device,
+            esm_cache_dir=direction_oracle_esm_cache_dir,
+            esm_local_files_only=direction_oracle_esm_local_files_only,
+        )
+    directional_oracle.eval()
+    protein_tokens = directional_oracle.encode_protein(target_protein_seq)
+    # Convert direction string to numerical value
+    direction_map = {'agonist': +1.0, 'antagonist': -1.0}
+    d_star = direction_map.get(target_direction.lower(), +1.0)
+    # Create reward function
+    reward_function = TD3BRewardFunction(
+        affinity_predictor=affinity_predictor,
+        directional_oracle=directional_oracle,
+        target_direction=d_star,
+        target_protein_tokens=protein_tokens,
+        peptide_tokenizer=peptide_tokenizer,
+        device=device,
+        **kwargs
+    )
+    return reward_function

tokenizer/my_tokenizers.py ADDED Viewed

	@@ -0,0 +1,424 @@

+import collections
+import os
+import re
+from typing import List, Optional
+from transformers import PreTrainedTokenizer
+from SmilesPE.tokenizer import SPE_Tokenizer
+import torch
+def load_vocab(vocab_file):
+    """Loads a vocabulary file into a dictionary."""
+    vocab = collections.OrderedDict()
+    with open(vocab_file, "r", encoding="utf-8") as reader:
+        tokens = reader.readlines()
+    for index, token in enumerate(tokens):
+        token = token.rstrip("\n")
+        vocab[token] = index
+    return vocab
+class Atomwise_Tokenizer(object):
+    """Run atom-level SMILES tokenization"""
+    def __init__(self):
+        """ Constructs a atom-level Tokenizer.
+        """
+        # self.regex_pattern = r"(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\|\/|:|~|@|\?|>>?|\*|\$|\%[0-9]{2}|[0-9])"
+        self.regex_pattern = r"(\([^\(\)]{0,4}\)|\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\|\/\/?|:|~|@|\?|>>?|\*|\$|\%[0-9]{2}|[0-9])"
+        self.regex = re.compile(self.regex_pattern)
+    def tokenize(self, text):
+        """ Basic Tokenization of a SMILES.
+        """
+        tokens = [token for token in self.regex.findall(text)]
+        return tokens
+class SMILES_SPE_Tokenizer(PreTrainedTokenizer):
+    r"""
+    Constructs a SMILES tokenizer. Based on SMILES Pair Encoding (https://github.com/XinhaoLi74/SmilesPE).
+    This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the methods. Users
+    should refer to the superclass for more information regarding methods.
+    Args:
+        vocab_file (:obj:`string`):
+            File containing the vocabulary.
+        spe_file (:obj:`string`):
+            File containing the trained SMILES Pair Encoding vocabulary.
+        unk_token (:obj:`string`, `optional`, defaults to "[UNK]"):
+            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
+            token instead.
+        sep_token (:obj:`string`, `optional`, defaults to "[SEP]"):
+            The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences
+            for sequence classification or for a text and a question for question answering.
+            It is also used as the last token of a sequence built with special tokens.
+        pad_token (:obj:`string`, `optional`, defaults to "[PAD]"):
+            The token used for padding, for example when batching sequences of different lengths.
+        cls_token (:obj:`string`, `optional`, defaults to "[CLS]"):
+            The classifier token which is used when doing sequence classification (classification of the whole
+            sequence instead of per-token classification). It is the first token of the sequence when built with
+            special tokens.
+        mask_token (:obj:`string`, `optional`, defaults to "[MASK]"):
+            The token used for masking values. This is the token used when training this model with masked language
+            modeling. This is the token which the model will try to predict.
+    """
+    def __init__(self, vocab_file, spe_file,
+                unk_token="[UNK]",
+                sep_token="[SEP]",
+                pad_token="[PAD]",
+                cls_token="[CLS]",
+                mask_token="[MASK]",
+                **kwargs):
+        if not os.path.isfile(vocab_file):
+            raise ValueError("Can't find a vocabulary file at path '{}'.".format(vocab_file))
+        if not os.path.isfile(spe_file):
+            raise ValueError("Can't find a SPE vocabulary file at path '{}'.".format(spe_file))
+        self.vocab = load_vocab(vocab_file)
+        self.spe_vocab = open(spe_file, 'r', encoding='utf-8')
+        self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()])
+        self.spe_tokenizer = SPE_Tokenizer(self.spe_vocab)
+        super().__init__(
+            unk_token=unk_token,
+            sep_token=sep_token,
+            pad_token=pad_token,
+            cls_token=cls_token,
+            mask_token=mask_token,
+            **kwargs)
+    @property
+    def vocab_size(self):
+        return len(self.vocab)
+    def get_vocab(self):
+        return dict(self.vocab, **self.added_tokens_encoder)
+    def _tokenize(self, text):
+        return self.spe_tokenizer.tokenize(text).split(' ')
+    def _convert_token_to_id(self, token):
+        """ Converts a token (str) in an id using the vocab. """
+        return self.vocab.get(token, self.vocab.get(self.unk_token))
+    # changed encode and decode functions
+    def encode(self, token_array):
+        token_ids = []
+        token_ids.append(2)
+        for token in token_array:
+            id = self._convert_token_to_id(token)
+            token_ids.append(id)
+        token_ids.append(3)
+        token_ids = torch.tensor([token_ids])
+        attn_mask = torch.ones_like(token_ids)
+        return {'input_ids': token_ids, 'attention_mask': attn_mask}
+    def decode(self, token_ids, skip_special_tokens=True):
+        token_ids = token_ids.squeeze(0).cpu().tolist()
+        token_array = []
+        for idx in token_ids:
+            if idx == 3:  # Stop decoding when token ID 3 is encountered
+                break
+            if skip_special_tokens and idx in self.all_special_ids:
+                continue
+            token = self._convert_id_to_token(idx)
+            token_array.append(token)
+        sequence = "".join(token_array)
+        return sequence
+    def batch_decode(self, batch_token_ids, skip_special_tokens=True):
+        sequences = []
+        for token_ids in batch_token_ids:
+            sequences.append(self.decode(token_ids))
+        return sequences
+    def get_token_split(self, token_ids):
+        if isinstance(token_ids, torch.Tensor):
+            token_ids = token_ids.cpu().tolist()
+        token_array = []
+        for seq_ids in token_ids:
+            seq_array = []
+            for id in seq_ids:
+                token = self._convert_id_to_token(id)
+                seq_array.append(token)
+            token_array.append(seq_array)
+        return token_array
+    def _convert_id_to_token(self, index):
+        """Converts an index (integer) in a token (str) using the vocab."""
+        return self.ids_to_tokens.get(index, self.unk_token)
+    def convert_tokens_to_string(self, tokens):
+        """ Converts a sequence of tokens (string) in a single string. """
+        out_string = " ".join(tokens).replace(" ##", "").strip()
+        return out_string
+    def build_inputs_with_special_tokens(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+    ) -> List[int]:
+        """
+        Build model inputs from a sequence or a pair of sequence for sequence classification tasks
+        by concatenating and adding special tokens.
+        A BERT sequence has the following format:
+        - single sequence: ``[CLS] X [SEP]``
+        - pair of sequences: ``[CLS] A [SEP] B [SEP]``
+        Args:
+            token_ids_0 (:obj:`List[int]`):
+                List of IDs to which the special tokens will be added
+            token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
+                Optional second list of IDs for sequence pairs.
+        Returns:
+            :obj:`List[int]`: list of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens.
+        """
+        if token_ids_1 is None:
+            return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
+        cls = [self.cls_token_id]
+        sep = [self.sep_token_id]
+        return cls + token_ids_0 + sep + token_ids_1 + sep
+    def get_special_tokens_mask(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
+    ) -> List[int]:
+        """
+        Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
+        special tokens using the tokenizer ``prepare_for_model`` method.
+        Args:
+            token_ids_0 (:obj:`List[int]`):
+                List of ids.
+            token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
+                Optional second list of IDs for sequence pairs.
+            already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Set to True if the token list is already formatted with special tokens for the model
+        Returns:
+            :obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+        """
+        if already_has_special_tokens:
+            if token_ids_1 is not None:
+                raise ValueError(
+                    "You should not supply a second sequence if the provided sequence of "
+                    "ids is already formated with special tokens for the model."
+                )
+            return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
+        if token_ids_1 is not None:
+            return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1]
+        return [1] + ([0] * len(token_ids_0)) + [1]
+    def create_token_type_ids_from_sequences(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+    ) -> List[int]:
+        """
+        Creates a mask from the two sequences passed to be used in a sequence-pair classification task.
+        A BERT sequence pair mask has the following format:
+        ::
+            0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
+            | first sequence    | second sequence |
+        if token_ids_1 is None, only returns the first portion of the mask (0's).
+        Args:
+            token_ids_0 (:obj:`List[int]`):
+                List of ids.
+            token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
+                Optional second list of IDs for sequence pairs.
+        Returns:
+            :obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given
+            sequence(s).
+        """
+        sep = [self.sep_token_id]
+        cls = [self.cls_token_id]
+        if token_ids_1 is None:
+            return len(cls + token_ids_0 + sep) * [0]
+        return len(cls + token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1]
+    def save_vocabulary(self, vocab_path):
+        """
+        Save the sentencepiece vocabulary (copy original file) and special tokens file to a directory.
+        Args:
+            vocab_path (:obj:`str`):
+                The directory in which to save the vocabulary.
+        Returns:
+            :obj:`Tuple(str)`: Paths to the files saved.
+        """
+        index = 0
+        vocab_file = vocab_path
+        with open(vocab_file, "w", encoding="utf-8") as writer:
+            for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]):
+                if index != token_index:
+                    index = token_index
+                writer.write(token + "\n")
+                index += 1
+        return (vocab_file,)
+class SMILES_Atomwise_Tokenizer(PreTrainedTokenizer):
+    r"""
+    Constructs a SMILES tokenizer. Based on SMILES Pair Encoding (https://github.com/XinhaoLi74/SmilesPE).
+    This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the methods. Users
+    should refer to the superclass for more information regarding methods.
+    Args:
+        vocab_file (:obj:`string`):
+            File containing the vocabulary.
+        unk_token (:obj:`string`, `optional`, defaults to "[UNK]"):
+            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
+            token instead.
+        sep_token (:obj:`string`, `optional`, defaults to "[SEP]"):
+            The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences
+            for sequence classification or for a text and a question for question answering.
+            It is also used as the last token of a sequence built with special tokens.
+        pad_token (:obj:`string`, `optional`, defaults to "[PAD]"):
+            The token used for padding, for example when batching sequences of different lengths.
+        cls_token (:obj:`string`, `optional`, defaults to "[CLS]"):
+            The classifier token which is used when doing sequence classification (classification of the whole
+            sequence instead of per-token classification). It is the first token of the sequence when built with
+            special tokens.
+        mask_token (:obj:`string`, `optional`, defaults to "[MASK]"):
+            The token used for masking values. This is the token used when training this model with masked language
+            modeling. This is the token which the model will try to predict.
+    """
+    def __init__(
+        self,
+        vocab_file,
+        unk_token="[UNK]",
+        sep_token="[SEP]",
+        pad_token="[PAD]",
+        cls_token="[CLS]",
+        mask_token="[MASK]",
+        **kwargs
+    ):
+        super().__init__(
+            unk_token=unk_token,
+            sep_token=sep_token,
+            pad_token=pad_token,
+            cls_token=cls_token,
+            mask_token=mask_token,
+            **kwargs,
+        )
+        if not os.path.isfile(vocab_file):
+            raise ValueError(
+                "Can't find a vocabulary file at path '{}'.".format(vocab_file)
+            )
+        self.vocab = load_vocab(vocab_file)
+        self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()])
+        self.tokenizer = Atomwise_Tokenizer()
+    @property
+    def vocab_size(self):
+        return len(self.vocab)
+    def get_vocab(self):
+        return dict(self.vocab, **self.added_tokens_encoder)
+    def _tokenize(self, text):
+        return self.tokenizer.tokenize(text)
+    def _convert_token_to_id(self, token):
+        """ Converts a token (str) in an id using the vocab. """
+        return self.vocab.get(token, self.vocab.get(self.unk_token))
+    def _convert_id_to_token(self, index):
+        """Converts an index (integer) in a token (str) using the vocab."""
+        return self.ids_to_tokens.get(index, self.unk_token)
+    def convert_tokens_to_string(self, tokens):
+        """ Converts a sequence of tokens (string) in a single string. """
+        out_string = " ".join(tokens).replace(" ##", "").strip()
+        return out_string
+    def build_inputs_with_special_tokens(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+    ) -> List[int]:
+        """
+        Build model inputs from a sequence or a pair of sequence for sequence classification tasks
+        by concatenating and adding special tokens.
+        A BERT sequence has the following format:
+        - single sequence: ``[CLS] X [SEP]``
+        - pair of sequences: ``[CLS] A [SEP] B [SEP]``
+        Args:
+            token_ids_0 (:obj:`List[int]`):
+                List of IDs to which the special tokens will be added
+            token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
+                Optional second list of IDs for sequence pairs.
+        Returns:
+            :obj:`List[int]`: list of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens.
+        """
+        if token_ids_1 is None:
+            return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
+        cls = [self.cls_token_id]
+        sep = [self.sep_token_id]
+        return cls + token_ids_0 + sep + token_ids_1 + sep
+    def get_special_tokens_mask(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
+    ) -> List[int]:
+        """
+        Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
+        special tokens using the tokenizer ``prepare_for_model`` method.
+        Args:
+            token_ids_0 (:obj:`List[int]`):
+                List of ids.
+            token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
+                Optional second list of IDs for sequence pairs.
+            already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Set to True if the token list is already formatted with special tokens for the model
+        Returns:
+            :obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+        """
+        if already_has_special_tokens:
+            if token_ids_1 is not None:
+                raise ValueError(
+                    "You should not supply a second sequence if the provided sequence of "
+                    "ids is already formated with special tokens for the model."
+                )
+            return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
+        if token_ids_1 is not None:
+            return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1]
+        return [1] + ([0] * len(token_ids_0)) + [1]
+    def create_token_type_ids_from_sequences(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+    ) -> List[int]:
+        """
+        Creates a mask from the two sequences passed to be used in a sequence-pair classification task.
+        A BERT sequence pair mask has the following format:
+        ::
+            0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
+            | first sequence    | second sequence |
+        if token_ids_1 is None, only returns the first portion of the mask (0's).
+        Args:
+            token_ids_0 (:obj:`List[int]`):
+                List of ids.
+            token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
+                Optional second list of IDs for sequence pairs.
+        Returns:
+            :obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given
+            sequence(s).
+        """
+        sep = [self.sep_token_id]
+        cls = [self.cls_token_id]
+        if token_ids_1 is None:
+            return len(cls + token_ids_0 + sep) * [0]
+        return len(cls + token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1]
+    def save_vocabulary(self, vocab_path):
+        """
+        Save the sentencepiece vocabulary (copy original file) and special tokens file to a directory.
+        Args:
+            vocab_path (:obj:`str`):
+                The directory in which to save the vocabulary.
+        Returns:
+            :obj:`Tuple(str)`: Paths to the files saved.
+        """
+        index = 0
+        vocab_file = vocab_path
+        with open(vocab_file, "w", encoding="utf-8") as writer:
+            for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]):
+                if index != token_index:
+                    index = token_index
+                writer.write(token + "\n")
+                index += 1
+        return (vocab_file,)

tokenizer/new_splits.txt ADDED Viewed

	@@ -0,0 +1,159 @@

+c 1
+c 2
+c 3
+c 4
+c 5
+c 6
+c 7
+c 8
+c 9
+( c1
+( c2
+c1 )
+c2 )
+n 1
+n 2
+n 3
+n 4
+n 5
+n 6
+n 7
+n 8
+n 9
+( n1
+( n2
+n1 )
+n2 )
+O 1
+O 2
+O 3
+O 4
+O 5
+O 6
+O 7
+O 8
+O 9
+( O1
+( O2
+O2 )
+O2 )
+= O
+= C
+= c
+= N
+= n
+=C C
+=C N
+=C c
+=c c
+=N C
+=N c
+=n C
+=n c
+# N
+# C
+#N C
+#C C
+#C N
+#N N
+( C
+C )
+( O
+O )
+( N
+N )
+Br c
+( =O
+(=O )
+C (=O)
+C =O
+C =N
+C #N
+C #C
+C C
+CC C
+CC N
+CC O
+CC S
+CC c
+CC n
+C N
+CN C
+CN c
+C O
+CO C
+CO N
+CO c
+C S
+CS C
+CS S
+CS c
+C c
+Cl c
+C n
+F c
+N C
+NC C
+NC c
+N N
+N O
+N c
+N n
+O C
+OC C
+OC O
+OC c
+O N
+O O
+O c
+S C
+SC C
+SC c
+S S
+S c
+c c
+cc c
+cc n
+cc o
+cc s
+cc cc
+c n
+cn c
+cn n
+c o
+co c
+c s
+cs c
+cs n
+n c
+nc c
+nc n
+nc o
+nc s
+n n
+nn c
+nn n
+n o
+no c
+no n
+n s
+ns c
+ns n
+o c
+oc c
+o n
+s c
+sc c
+sc n
+s n
+N P
+P N
+C P
+P C
+N S
+S N
+C S
+S C
+S P
+P S
+C I

tokenizer/new_vocab.txt ADDED Viewed

	@@ -0,0 +1,587 @@

+[PAD]
+[UNK]
+[CLS]
+[SEP]
+[MASK]
+#
+%
+(
+)
++
+-
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+=
+@
+A
+B
+Br
+Brc
+C
+CC
+CCC
+CCN
+CCO
+CCS
+CCc
+CCn
+CN
+CNC
+CNc
+CO
+COC
+CON
+COc
+CS
+CSC
+CSS
+CSc
+Cc
+Cl
+Clc
+Cn
+F
+Fc
+H
+I
+K
+L
+M
+N
+NC
+NCC
+NCc
+NN
+NO
+Nc
+Nn
+O
+OC
+OCC
+OCO
+OCc
+ON
+OO
+Oc
+P
+R
+S
+SC
+SCC
+SCc
+SS
+Sc
+T
+X
+Z
+[
+\\
+(/
+]
+a
+b
+c
+cc
+ccc
+cccc
+ccn
+cco
+ccs
+cn
+cnc
+cnn
+co
+coc
+cs
+csc
+csn
+e
+g
+i
+l
+n
+nc
+ncc
+ncn
+nco
+ncs
+nn
+nnc
+nnn
+no
+noc
+non
+ns
+nsc
+nsn
+o
+oc
+occ
+on
+p
+r
+s
+sc
+scc
+scn
+sn
+t
+c1
+c2
+c3
+c4
+c5
+c6
+c7
+c8
+c9
+n1
+n2
+n3
+n4
+n5
+n6
+n7
+n8
+n9
+O1
+O2
+O3
+O4
+O5
+O6
+O7
+O8
+O9
+(c1
+(c2
+c1)
+c2)
+(n1
+(n2
+n1)
+n2)
+(O1
+(O2
+O2)
+=O
+=C
+=c
+=N
+=n
+=CC
+=CN
+=Cc
+=cc
+=NC
+=Nc
+=nC
+=nc
+#C
+#CC
+#CN
+#N
+#NC
+#NN
+(C
+C)
+(O
+O)
+(N
+N)
+NP
+PN
+CP
+PC
+NS
+SN
+SP
+PS
+C(=O)
+(/Br)
+(/C#N)
+(/C)
+(/C=N)
+(/C=O)
+(/CBr)
+(/CC)
+(/CCC)
+(/CCF)
+(/CCN)
+(/CCO)
+(/CCl)
+(/CI)
+(/CN)
+(/CO)
+(/CS)
+(/Cl)
+(/F)
+(/I)
+(/N)
+(/NC)
+(/NCC)
+(/NO)
+(/O)
+(/OC)
+(/OCC)
+(/S)
+(/SC)
+(=C)
+(=C/C)
+(=C/F)
+(=C/I)
+(=C/N)
+(=C/O)
+(=CBr)
+(=CC)
+(=CCF)
+(=CCN)
+(=CCO)
+(=CCl)
+(=CF)
+(=CI)
+(=CN)
+(=CO)
+(=C\\C)
+(=C\\F)
+(=C\\I)
+(=C\\N)
+(=C\\O)
+(=N)
+(=N/C)
+(=N/N)
+(=N/O)
+(=NBr)
+(=NC)
+(=NCC)
+(=NCl)
+(=NN)
+(=NO)
+(=NOC)
+(=N\\C)
+(=N\\N)
+(=N\\O)
+(=O)
+(=S)
+(B)
+(Br)
+(C#C)
+(C#CC)
+(C#CI)
+(C#CO)
+(C#N)
+(C#SN)
+(C)
+(C=C)
+(C=CF)
+(C=CI)
+(C=N)
+(C=NN)
+(C=NO)
+(C=O)
+(C=S)
+(CBr)
+(CC#C)
+(CC#N)
+(CC)
+(CC=C)
+(CC=O)
+(CCBr)
+(CCC)
+(CCCC)
+(CCCF)
+(CCCI)
+(CCCN)
+(CCCO)
+(CCCS)
+(CCCl)
+(CCF)
+(CCI)
+(CCN)
+(CCNC)
+(CCNN)
+(CCNO)
+(CCO)
+(CCOC)
+(CCON)
+(CCS)
+(CCSC)
+(CCl)
+(CF)
+(CI)
+(CN)
+(CN=O)
+(CNC)
+(CNCC)
+(CNCO)
+(CNN)
+(CNNC)
+(CNO)
+(CNOC)
+(CO)
+(COC)
+(COCC)
+(COCI)
+(COCN)
+(COCO)
+(COF)
+(CON)
+(COO)
+(CS)
+(CSC)
+(CSCC)
+(CSCF)
+(CSO)
+(Cl)
+(F)
+(I)
+(N)
+(N=N)
+(N=NO)
+(N=O)
+(N=S)
+(NBr)
+(NC#N)
+(NC)
+(NC=N)
+(NC=O)
+(NC=S)
+(NCBr)
+(NCC)
+(NCCC)
+(NCCF)
+(NCCN)
+(NCCO)
+(NCCS)
+(NCCl)
+(NCNC)
+(NCO)
+(NCS)
+(NCl)
+(NN)
+(NN=O)
+(NNC)
+(NO)
+(NOC)
+(O)
+(OC#N)
+(OC)
+(OC=C)
+(OC=O)
+(OC=S)
+(OCBr)
+(OCC)
+(OCCC)
+(OCCF)
+(OCCI)
+(OCCN)
+(OCCO)
+(OCCS)
+(OCCl)
+(OCF)
+(OCI)
+(OCO)
+(OCOC)
+(OCON)
+(OCSC)
+(OCl)
+(OI)
+(ON)
+(OO)
+(OOC)
+(OOCC)
+(OOSN)
+(OSC)
+(P)
+(S)
+(SC#N)
+(SC)
+(SCC)
+(SCCC)
+(SCCF)
+(SCCN)
+(SCCO)
+(SCCS)
+(SCCl)
+(SCF)
+(SCN)
+(SCOC)
+(SCSC)
+(SCl)
+(SI)
+(SN)
+(SN=O)
+(SO)
+(SOC)
+(SOOO)
+(SS)
+(SSC)
+(SSCC)
+([At])
+([O-])
+([O])
+([S-])
+(\\Br)
+(\\C#N)
+(\\C)
+(\\C=N)
+(\\C=O)
+(\\CBr)
+(\\CC)
+(\\CCC)
+(\\CCO)
+(\\CCl)
+(\\CF)
+(\\CN)
+(\\CNC)
+(\\CO)
+(\\COC)
+(\\Cl)
+(\\F)
+(\\I)
+(\\N)
+(\\NC)
+(\\NCC)
+(\\NN)
+(\\NO)
+(\\NOC)
+(\\O)
+(\\OC)
+(\\OCC)
+(\\ON)
+(\\S)
+(\\SC)
+(\\SCC)
+[Ag+]
+[Ag-4]
+[Ag]
+[Al-3]
+[Al]
+[As+]
+[AsH3]
+[AsH]
+[As]
+[At]
+[B-]
+[B@-]
+[B@@-]
+[BH-]
+[BH2-]
+[BH3-]
+[B]
+[Ba]
+[Br+2]
+[BrH]
+[Br]
+[C+]
+[C-]
+[C@@H]
+[C@@]
+[C@H]
+[C@]
+[CH-]
+[CH2]
+[CH3]
+[CH]
+[C]
+[CaH2]
+[Ca]
+[Cl+2]
+[Cl+3]
+[Cl+]
+[Cs]
+[FH]
+[F]
+[H]
+[He]
+[I+2]
+[I+3]
+[I+]
+[IH]
+[I]
+[K]
+[Kr]
+[Li+]
+[LiH]
+[MgH2]
+[Mg]
+[N+]
+[N-]
+[N@+]
+[N@@+]
+[N@@]
+[N@]
+[NH+]
+[NH-]
+[NH2+]
+[NH3]
+[NH]
+[N]
+[Na]
+[O+]
+[O-]
+[OH+]
+[OH2]
+[OH]
+[O]
+[P+]
+[P@+]
+[P@@+]
+[P@@]
+[P@]
+[PH2]
+[PH]
+[P]
+[Ra]
+[Rb]
+[S+]
+[S-]
+[S@+]
+[S@@+]
+[S@@]
+[S@]
+[SH+]
+[SH2]
+[SH]
+[S]
+[Se+]
+[Se-2]
+[SeH2]
+[SeH]
+[Se]
+[Si@]
+[SiH2]
+[SiH]
+[Si]
+[SrH2]
+[TeH]
+[Te]
+[Xe]
+[Zn+2]
+[Zn-2]
+[Zn]
+[b-]
+[c+]
+[c-]
+[cH-]
+[cH]
+[c]
+[n+]
+[n-]
+[nH]
+[n]
+[o+]
+[s+]
+[se+]
+[se]
+[te+]
+[te]

utils/app.py ADDED Viewed

	@@ -0,0 +1,1287 @@

+import os
+import re
+import pandas as pd
+from io import StringIO
+import rdkit
+from rdkit import Chem
+from rdkit.Chem import AllChem, Draw
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+from io import BytesIO
+import tempfile
+from rdkit import Chem
+class PeptideAnalyzer:
+    def __init__(self):
+        self.bond_patterns = [
+            (r'OC\(=O\)', 'ester'),  # Ester bond
+            (r'N\(C\)C\(=O\)', 'n_methyl'),  # N-methylated peptide bond
+            (r'N[0-9]C\(=O\)', 'proline'),  # Proline peptide bond
+            (r'NC\(=O\)', 'peptide'),  # Standard peptide bond
+            (r'C\(=O\)N\(C\)', 'n_methyl_reverse'),  # Reverse N-methylated
+            (r'C\(=O\)N[12]?', 'peptide_reverse')  # Reverse peptide bond
+        ]
+        # Three to one letter code mapping
+        self.three_to_one = {
+            'Ala': 'A', 'Cys': 'C', 'Asp': 'D', 'Glu': 'E',
+            'Phe': 'F', 'Gly': 'G', 'His': 'H', 'Ile': 'I',
+            'Lys': 'K', 'Leu': 'L', 'Met': 'M', 'Asn': 'N',
+            'Pro': 'P', 'Gln': 'Q', 'Arg': 'R', 'Ser': 'S',
+            'Thr': 'T', 'Val': 'V', 'Trp': 'W', 'Tyr': 'Y'
+        }
+    def is_amino_acid_sequence(self, seq):
+        """
+        Check if the input is a valid amino acid sequence.
+        Args:
+            seq: String to check
+        Returns:
+            bool: True if valid amino acid sequence, False otherwise
+        """
+        if not seq or not isinstance(seq, str):
+            return False
+        # Valid amino acid letters (20 standard + some common modifications)
+        valid_amino_acids = set('ACDEFGHIKLMNPQRSTVWY')
+        # Check if all characters are valid amino acids
+        # Allow for some special characters that might be in the sequence
+        seq_clean = seq.strip().upper()
+        # Must have at least 2 amino acids to be a peptide
+        if len(seq_clean) < 2:
+            return False
+        # Check if all characters are valid amino acids
+        return all(c in valid_amino_acids for c in seq_clean)
+    def is_peptide(self, smiles):
+        """Check if the SMILES represents a peptide structure"""
+        # First check if it's an amino acid sequence (not SMILES)
+        if self.is_amino_acid_sequence(smiles):
+            return True
+        # Otherwise check if it's a SMILES peptide
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            return False
+        # Look for peptide bonds: NC(=O) pattern
+        peptide_bond_pattern = Chem.MolFromSmarts('[NH][C](=O)')
+        if mol.HasSubstructMatch(peptide_bond_pattern):
+            return True
+        # Look for N-methylated peptide bonds: N(C)C(=O) pattern
+        n_methyl_pattern = Chem.MolFromSmarts('[N;H0;$(NC)](C)[C](=O)')
+        if mol.HasSubstructMatch(n_methyl_pattern):
+            return True
+        return False
+    def is_cyclic(self, smiles):
+        """Improved cyclic peptide detection"""
+        # Check for C-terminal carboxyl
+        if smiles.endswith('C(=O)O'):
+            return False, [], []
+        # Find all numbers used in ring closures
+        ring_numbers = re.findall(r'(?:^|[^c])[0-9](?=[A-Z@\(\)])', smiles)
+        # Find aromatic ring numbers
+        aromatic_matches = re.findall(r'c[0-9](?:ccccc|c\[nH\]c)[0-9]', smiles)
+        aromatic_cycles = []
+        for match in aromatic_matches:
+            numbers = re.findall(r'[0-9]', match)
+            aromatic_cycles.extend(numbers)
+        # Numbers that aren't part of aromatic rings are peptide cycles
+        peptide_cycles = [n for n in ring_numbers if n not in aromatic_cycles]
+        is_cyclic = len(peptide_cycles) > 0 and not smiles.endswith('C(=O)O')
+        return is_cyclic, peptide_cycles, aromatic_cycles
+    def split_on_bonds(self, smiles):
+        """Split SMILES into segments with simplified Pro handling"""
+        positions = []
+        used = set()
+        # Find Gly pattern first
+        gly_pattern = r'NCC\(=O\)'
+        for match in re.finditer(gly_pattern, smiles):
+            if not any(p in range(match.start(), match.end()) for p in used):
+                positions.append({
+                    'start': match.start(),
+                    'end': match.end(),
+                    'type': 'gly',
+                    'pattern': match.group()
+                })
+                used.update(range(match.start(), match.end()))
+        for pattern, bond_type in self.bond_patterns:
+            for match in re.finditer(pattern, smiles):
+                if not any(p in range(match.start(), match.end()) for p in used):
+                    positions.append({
+                        'start': match.start(),
+                        'end': match.end(),
+                        'type': bond_type,
+                        'pattern': match.group()
+                    })
+                    used.update(range(match.start(), match.end()))
+        # Sort by position
+        positions.sort(key=lambda x: x['start'])
+        # Create segments
+        segments = []
+        if positions:
+            # First segment
+            if positions[0]['start'] > 0:
+                segments.append({
+                    'content': smiles[0:positions[0]['start']],
+                    'bond_after': positions[0]['pattern']
+                })
+            # Process segments
+            for i in range(len(positions)-1):
+                current = positions[i]
+                next_pos = positions[i+1]
+                if current['type'] == 'gly':
+                    segments.append({
+                        'content': 'NCC(=O)',
+                        'bond_before': positions[i-1]['pattern'] if i > 0 else None,
+                        'bond_after': next_pos['pattern']
+                    })
+                else:
+                    content = smiles[current['end']:next_pos['start']]
+                    if content:
+                        segments.append({
+                            'content': content,
+                            'bond_before': current['pattern'],
+                            'bond_after': next_pos['pattern']
+                        })
+            # Last segment
+            if positions[-1]['end'] < len(smiles):
+                segments.append({
+                    'content': smiles[positions[-1]['end']:],
+                    'bond_before': positions[-1]['pattern']
+                })
+        return segments
+    def clean_terminal_carboxyl(self, segment):
+        """Remove C-terminal carboxyl only if it's the true terminus"""
+        content = segment['content']
+        # Only clean if:
+        # 1. Contains C(=O)O
+        # 2. No bond_after exists (meaning it's the last segment)
+        # 3. C(=O)O is at the end of the content
+        if 'C(=O)O' in content and not segment.get('bond_after'):
+            print('recognized?')
+            # Remove C(=O)O pattern regardless of position
+            cleaned = re.sub(r'\(C\(=O\)O\)', '', content)
+            # Remove any leftover empty parentheses
+            cleaned = re.sub(r'\(\)', '', cleaned)
+            print(cleaned)
+            return cleaned
+        return content
+    def identify_residue(self, segment):
+        """Identify residue with Pro reconstruction"""
+        # Only clean terminal carboxyl if this is the last segment
+        content = self.clean_terminal_carboxyl(segment)
+        mods = self.get_modifications(segment)
+        # UAA pattern matching section - before regular residues
+        # Phenylglycine and derivatives
+        if 'c1ccccc1' in content:
+            if '[C@@H](c1ccccc1)' in content or '[C@H](c1ccccc1)' in content:
+                return '4', mods  # Base phenylglycine
+        # 4-substituted phenylalanines
+        if 'Cc1ccc' in content:
+            if 'OMe' in content or 'OCc1ccc' in content:
+                return '0A1', mods  # 4-methoxy-Phenylalanine
+            elif 'Clc1ccc' in content:
+                return '200', mods  # 4-chloro-Phenylalanine
+            elif 'Brc1ccc' in content:
+                return '4BF', mods  # 4-Bromo-phenylalanine
+            elif 'C#Nc1ccc' in content:
+                return '4CF', mods  # 4-cyano-phenylalanine
+            elif 'Ic1ccc' in content:
+                return 'PHI', mods  # 4-Iodo-phenylalanine
+            elif 'Fc1ccc' in content:
+                return 'PFF', mods  # 4-Fluoro-phenylalanine
+        # Modified tryptophans
+        if 'c[nH]c2' in content:
+            if 'Oc2cccc2' in content:
+                return '0AF', mods  # 7-hydroxy-tryptophan
+            elif 'Fc2cccc2' in content:
+                return '4FW', mods  # 4-fluoro-tryptophan
+            elif 'Clc2cccc2' in content:
+                return '6CW', mods  # 6-chloro-tryptophan
+            elif 'Brc2cccc2' in content:
+                return 'BTR', mods  # 6-bromo-tryptophan
+            elif 'COc2cccc2' in content:
+                return 'MOT5', mods  # 5-Methoxy-tryptophan
+            elif 'Cc2cccc2' in content:
+                return 'MTR5', mods  # 5-Methyl-tryptophan
+        # Special amino acids
+        if 'CC(C)(C)[C@@H]' in content or 'CC(C)(C)[C@H]' in content:
+            return 'BUG', mods  # Tertleucine
+        if 'CCCNC(=N)N' in content:
+            return 'CIR', mods  # Citrulline
+        if '[SeH]' in content:
+            return 'CSE', mods  # Selenocysteine
+        if '[NH3]CC[C@@H]' in content or '[NH3]CC[C@H]' in content:
+            return 'DAB', mods  # Diaminobutyric acid
+        if 'C1CCCCC1' in content:
+            if 'C1CCCCC1[C@@H]' in content or 'C1CCCCC1[C@H]' in content:
+                return 'CHG', mods  # Cyclohexylglycine
+            elif 'C1CCCCC1C[C@@H]' in content or 'C1CCCCC1C[C@H]' in content:
+                return 'ALC', mods  # 3-cyclohexyl-alanine
+        # Naphthalene derivatives
+        if 'c1cccc2c1cccc2' in content:
+            if 'c1cccc2c1cccc2[C@@H]' in content or 'c1cccc2c1cccc2[C@H]' in content:
+                return 'NAL', mods  # 2-Naphthyl-alanine
+        # Heteroaromatic derivatives
+        if 'c1cncc' in content:
+            return 'PYR4', mods  # 3-(4-Pyridyl)-alanine
+        if 'c1cscc' in content:
+            return 'THA3', mods  # 3-(3-thienyl)-alanine
+        if 'c1nnc' in content:
+            return 'TRZ4', mods  # 3-(1,2,4-Triazol-1-yl)-alanine
+        # Modified serines and threonines
+        if 'OP(O)(O)O' in content:
+            if '[C@@H](COP' in content or '[C@H](COP' in content:
+                return 'SEP', mods  # phosphoserine
+            elif '[C@@H](OP' in content or '[C@H](OP' in content:
+                return 'TPO', mods  # phosphothreonine
+        # Specialized ring systems
+        if 'c1c2ccccc2cc2c1cccc2' in content:
+            return 'ANTH', mods  # 3-(9-anthryl)-alanine
+        if 'c1csc2c1cccc2' in content:
+            return 'BTH3', mods  # 3-(3-benzothienyl)-alanine
+        if '[C@]12C[C@H]3C[C@@H](C2)C[C@@H](C1)C3' in content:
+            return 'ADAM', mods  # Adamanthane
+        # Fluorinated derivatives
+        if 'FC(F)(F)' in content:
+            if 'CC(F)(F)F' in content:
+                return 'FLA', mods  # Trifluoro-alanine
+            if 'C(F)(F)F)c1' in content:
+                if 'c1ccccc1C(F)(F)F' in content:
+                    return 'TFG2', mods  # 2-(Trifluoromethyl)-phenylglycine
+                if 'c1cccc(c1)C(F)(F)F' in content:
+                    return 'TFG3', mods  # 3-(Trifluoromethyl)-phenylglycine
+                if 'c1ccc(cc1)C(F)(F)F' in content:
+                    return 'TFG4', mods  # 4-(Trifluoromethyl)-phenylglycine
+        # Multiple halogen patterns
+        if 'F' in content and 'c1' in content:
+            if 'c1ccc(c(c1)F)F' in content:
+                return 'F2F', mods  # 3,4-Difluoro-phenylalanine
+            if 'cc(F)cc(c1)F' in content:
+                return 'WFP', mods  # 3,5-Difluoro-phenylalanine
+        if 'Cl' in content and 'c1' in content:
+            if 'c1ccc(cc1Cl)Cl' in content:
+                return 'CP24', mods  # 2,4-dichloro-phenylalanine
+            if 'c1ccc(c(c1)Cl)Cl' in content:
+                return 'CP34', mods  # 3,4-dichloro-phenylalanine
+        # Hydroxy and amino derivatives
+        if 'O' in content and 'c1' in content:
+            if 'c1cc(O)cc(c1)O' in content:
+                return '3FG', mods  # (2s)-amino(3,5-dihydroxyphenyl)-ethanoic acid
+            if 'c1ccc(c(c1)O)O' in content:
+                return 'DAH', mods  # 3,4-Dihydroxy-phenylalanine
+        # Cyclic amino acids
+        if 'C1CCCC1' in content:
+            return 'CPA3', mods  # 3-Cyclopentyl-alanine
+        if 'C1CCCCC1' in content:
+            if 'CC1CCCCC1' in content:
+                return 'ALC', mods  # 3-cyclohexyl-alanine
+            else:
+                return 'CHG', mods  # Cyclohexylglycine
+        # Chain-length variants
+        if 'CCC[C@@H]' in content or 'CCC[C@H]' in content:
+            return 'NLE', mods  # Norleucine
+        if 'CC[C@@H]' in content or 'CC[C@H]' in content:
+            if not any(x in content for x in ['CC(C)', 'COC', 'CN(']):
+                return 'ABA', mods  # 2-Aminobutyric acid
+        # Modified histidines
+        if 'c1cnc' in content:
+            if '[C@@H]1CN[C@@H](N1)F' in content:
+                return '2HF', mods  # 2-fluoro-l-histidine
+            if 'c1cnc([nH]1)F' in content:
+                return '2HF1', mods  # 2-fluoro-l-histidine variant
+            if 'c1c[nH]c(n1)F' in content:
+                return '2HF2', mods  # 2-fluoro-l-histidine variant
+        # Sulfur and selenium containing
+        if '[SeH]' in content:
+            return 'CSE', mods  # Selenocysteine
+        if 'S' in content:
+            if 'CSCc1ccccc1' in content:
+                return 'BCS', mods  # benzylcysteine
+            if 'CCSC' in content:
+                return 'ESC', mods  # Ethionine
+            if 'CCS' in content:
+                return 'HCS', mods  # homocysteine
+        # Additional modifications
+        if 'CN=[N]=N' in content:
+            return 'AZDA', mods  # azido-alanine
+        if '[NH]=[C](=[NH2])=[NH2]' in content:
+            if 'CCC[NH]=' in content:
+                return 'AGM', mods  # 5-methyl-arginine
+            if 'CC[NH]=' in content:
+                return 'GDPR', mods  # 2-Amino-3-guanidinopropionic acid
+        if 'CCON' in content:
+            return 'CAN', mods  # canaline
+        if '[C@@H]1C=C[C@@H](C=C1)' in content:
+            return 'ACZ', mods  # cis-amiclenomycin
+        if 'CCC(=O)[NH3]' in content:
+            return 'ONL', mods  # 5-oxo-l-norleucine
+        if 'c1ccncc1' in content:
+            return 'PYR4', mods  # 3-(4-Pyridyl)-alanine
+        if 'c1ccco1' in content:
+            return 'FUA2', mods  # (2-furyl)-alanine
+        if 'c1ccc' in content:
+            if 'c1ccc(cc1)c1ccccc1' in content:
+                return 'BIF', mods  # 4,4-biphenylalanine
+            if 'c1ccc(cc1)C(=O)c1ccccc1' in content:
+                return 'PBF', mods  # 4-benzoyl-phenylalanine
+            if 'c1ccc(cc1)C(C)(C)C' in content:
+                return 'TBP4', mods  # 4-tert-butyl-phenylalanine
+            if 'c1ccc(cc1)[C](=[NH2])=[NH2]' in content:
+                return '0BN', mods  # 4-carbamimidoyl-l-phenylalanine
+            if 'c1cccc(c1)[C](=[NH2])=[NH2]' in content:
+                return 'APM', mods  # m-amidinophenyl-3-alanine
+        # Multiple hydroxy patterns
+        if 'O' in content:
+            if '[C@H]([C@H](C)O)O' in content:
+                return 'ILX', mods  # 4,5-dihydroxy-isoleucine
+            if '[C@H]([C@@H](C)O)O' in content:
+                return 'ALO', mods  # Allo-threonine
+            if '[C@H](COP(O)(O)O)' in content:
+                return 'SEP', mods  # phosphoserine
+            if '[C@H]([C@@H](C)OP(O)(O)O)' in content:
+                return 'TPO', mods  # phosphothreonine
+            if '[C@H](c1ccc(O)cc1)O' in content:
+                return 'OMX', mods  # (betar)-beta-hydroxy-l-tyrosine
+            if '[C@H](c1ccc(c(Cl)c1)O)O' in content:
+                return 'OMY', mods  # (betar)-3-chloro-beta-hydroxy-l-tyrosine
+        # Heterocyclic patterns
+        if 'n1' in content:
+            if 'n1cccn1' in content:
+                return 'PYZ1', mods  # 3-(1-Pyrazolyl)-alanine
+            if 'n1nncn1' in content:
+                return 'TEZA', mods  # 3-(2-Tetrazolyl)-alanine
+            if 'c2c(n1)cccc2' in content:
+                return 'QU32', mods  # 3-(2-Quinolyl)-alanine
+            if 'c1cnc2c(c1)cccc2' in content:
+                return 'QU33', mods  # 3-(3-quinolyl)-alanine
+            if 'c1ccnc2c1cccc2' in content:
+                return 'QU34', mods  # 3-(4-quinolyl)-alanine
+            if 'c1ccc2c(c1)nccc2' in content:
+                return 'QU35', mods  # 3-(5-Quinolyl)-alanine
+            if 'c1ccc2c(c1)cncc2' in content:
+                return 'QU36', mods  # 3-(6-Quinolyl)-alanine
+            if 'c1cnc2c(n1)cccc2' in content:
+                return 'QX32', mods  # 3-(2-quinoxalyl)-alanine
+        # Multiple nitrogen patterns
+        if 'N' in content:
+            if '[NH3]CC[C@@H]' in content:
+                return 'DAB', mods  # Diaminobutyric acid
+            if '[NH3]C[C@@H]' in content:
+                return 'DPP', mods  # 2,3-Diaminopropanoic acid
+            if '[NH3]CCCCCC[C@@H]' in content:
+                return 'HHK', mods  # (2s)-2,8-diaminooctanoic acid
+            if 'CCC[NH]=[C](=[NH2])=[NH2]' in content:
+                return 'GBUT', mods  # 2-Amino-4-guanidinobutryric acid
+            if '[NH]=[C](=S)=[NH2]' in content:
+                return 'THIC', mods  # Thio-citrulline
+        # Chain modified amino acids
+        if 'CC' in content:
+            if 'CCCC[C@@H]' in content:
+                return 'AHP', mods  # 2-Aminoheptanoic acid
+            if 'CCC([C@@H])(C)C' in content:
+                return 'I2M', mods  # 3-methyl-l-alloisoleucine
+            if 'CC[C@H]([C@@H])C' in content:
+                return 'IIL', mods  # Allo-Isoleucine
+            if '[C@H](CCC(C)C)' in content:
+                return 'HLEU', mods  # Homoleucine
+            if '[C@@H]([C@@H](C)O)C' in content:
+                return 'HLU', mods  # beta-hydroxyleucine
+        # Modified glutamate/aspartate patterns
+        if '[C@@H]' in content:
+            if '[C@@H](C[C@@H](F))' in content:
+                return 'FGA4', mods  # 4-Fluoro-glutamic acid
+            if '[C@@H](C[C@@H](O))' in content:
+                return '3GL', mods  # 4-hydroxy-glutamic-acid
+            if '[C@@H](C[C@H](C))' in content:
+                return 'LME', mods  # (3r)-3-methyl-l-glutamic acid
+            if '[C@@H](CC[C@H](C))' in content:
+                return 'MEG', mods  # (3s)-3-methyl-l-glutamic acid
+        # Sulfur and selenium modifications
+        if 'S' in content:
+            if 'SCC[C@@H]' in content:
+                return 'HSER', mods  # homoserine
+            if 'SCCN' in content:
+                return 'SLZ', mods  # thialysine
+            if 'SC(=O)' in content:
+                return 'CSA', mods  # s-acetonylcysteine
+            if '[S@@](=O)' in content:
+                return 'SME', mods  # Methionine sulfoxide
+            if 'S(=O)(=O)' in content:
+                return 'OMT', mods  # Methionine sulfone
+        # Double bond containing
+        if 'C=' in content:
+            if 'C=C[C@@H]' in content:
+                return '2AG', mods  # 2-Allyl-glycine
+            if 'C=C[C@@H]' in content:
+                return 'LVG', mods  # vinylglycine
+            if 'C=Cc1ccccc1' in content:
+                return 'STYA', mods  # Styrylalanine
+        # Special cases
+        if '[C@@H]1Cc2c(C1)cccc2' in content:
+            return 'IGL', mods  # alpha-amino-2-indanacetic acid
+        if '[C](=[C](=O)=O)=O' in content:
+            return '26P', mods  # 2-amino-6-oxopimelic acid
+        if '[C](=[C](=O)=O)=C' in content:
+            return '2NP', mods  # l-2-amino-6-methylene-pimelic acid
+        if 'c2cnc[nH]2' in content:
+            return 'HIS', mods  # histidine core
+        if 'c1cccc2c1cc(O)cc2' in content:
+            return 'NAO1', mods  # 5-hydroxy-1-naphthalene
+        if 'c1ccc2c(c1)cc(O)cc2' in content:
+            return 'NAO2', mods  # 6-hydroxy-2-naphthalene
+        # Proline (P) - flexible ring numbers
+        if any([
+            # Check for any ring number in bond patterns
+            (segment.get('bond_after', '').startswith(f'N{n}C(=O)') and 'CCC' in content and
+            any(f'[C@@H]{n}' in content or f'[C@H]{n}' in content for n in '123456789'))
+            for n in '123456789'
+        ]) or any([
+            # Check ending patterns with any ring number
+            (f'CCCN{n}' in content and content.endswith('=O') and
+            any(f'[C@@H]{n}' in content or f'[C@H]{n}' in content for n in '123456789'))
+            for n in '123456789'
+        ]) or any([
+            # Handle CCC[C@H]n patterns
+            (content == f'CCC[C@H]{n}' and segment.get('bond_before', '').startswith(f'C(=O)N{n}')) or
+            (content == f'CCC[C@@H]{n}' and segment.get('bond_before', '').startswith(f'C(=O)N{n}')) or
+            # N-terminal Pro with any ring number
+            (f'N{n}CCC[C@H]{n}' in content) or
+            (f'N{n}CCC[C@@H]{n}' in content)
+            for n in '123456789'
+        ]):
+            return 'Pro', mods
+        # Tryptophan (W) - more specific indole pattern
+        if re.search(r'c[0-9]c\[nH\]c[0-9]ccccc[0-9][0-9]', content) and \
+        'c[nH]c' in content.replace(' ', ''):
+            return 'Trp', mods
+        # Lysine (K) - both patterns
+        if '[C@@H](CCCCN)' in content or '[C@H](CCCCN)' in content:
+            return 'Lys', mods
+        # Arginine (R) - both patterns
+        if '[C@@H](CCCNC(=N)N)' in content or '[C@H](CCCNC(=N)N)' in content:
+            return 'Arg', mods
+        if ('C[C@H](CCCC)' in content or 'C[C@@H](CCCC)' in content) and 'CC(C)' not in content:
+            return 'Nle', mods
+        # Ornithine (Orn) - 3-carbon chain with NH2
+        if ('C[C@H](CCCN)' in content or 'C[C@@H](CCCN)' in content) and 'CC(C)' not in content:
+            return 'Orn', mods
+        # 2-Naphthylalanine (2Nal) - distinct from Phe pattern
+        if ('Cc3cc2ccccc2c3' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
+            return '2Nal', mods
+        # Cyclohexylalanine (Cha) - already in your code but moved here for clarity
+        if 'N2CCCCC2' in content or 'CCCCC2' in content:
+            return 'Cha', mods
+        # Aminobutyric acid (Abu) - 2-carbon chain
+        if ('C[C@H](CC)' in content or 'C[C@@H](CC)' in content) and not any(p in content for p in ['CC(C)', 'CCCC', 'CCC(C)']):
+            return 'Abu', mods
+        # Pipecolic acid (Pip) - 6-membered ring like Pro
+        if ('N3CCCCC3' in content or 'CCCCC3' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
+            return 'Pip', mods
+        # Cyclohexylglycine (Chg) - direct cyclohexyl without CH2
+        if ('C[C@H](C1CCCCC1)' in content or 'C[C@@H](C1CCCCC1)' in content):
+            return 'Chg', mods
+        # 4-Fluorophenylalanine (4F-Phe)
+        if ('Cc2ccc(F)cc2' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
+            return '4F-Phe', mods
+        # Regular residue identification
+        if ('NCC(=O)' in content) or (content == 'C'):
+            # Middle case - between bonds
+            if segment.get('bond_before') and segment.get('bond_after'):
+                if ('C(=O)N' in segment['bond_before'] or 'C(=O)N(C)' in segment['bond_before']):
+                    return 'Gly', mods
+            # Terminal case - at the end
+            elif segment.get('bond_before') and segment.get('bond_before').startswith('C(=O)N'):
+                return 'Gly', mods
+        if 'CC(C)C[C@H]' in content or 'CC(C)C[C@@H]' in content:
+            return 'Leu', mods
+        if '[C@@H](CC(C)C)' in content or '[C@H](CC(C)C)' in content:
+            return 'Leu', mods
+        if '[C@@H]([C@@H](C)O)' in content or '[C@H]([C@H](C)O)' in content:
+            return 'Thr', mods
+        if '[C@H](Cc2ccccc2)' in content or '[C@@H](Cc2ccccc2)' in content:
+            return 'Phe', mods
+        if ('[C@H](C(C)C)' in content or       # With outer parentheses
+            '[C@@H](C(C)C)' in content or      # With outer parentheses
+            '[C@H]C(C)C' in content or         # Without outer parentheses
+            '[C@@H]C(C)C' in content):         # Without outer parentheses
+            if not any(p in content for p in ['CC(C)C[C@H]', 'CC(C)C[C@@H]']):  # Still check not Leu
+                return 'Val', mods
+        if '[C@H](COC(C)(C)C)' in content or '[C@@H](COC(C)(C)C)' in content:
+            return 'O-tBu', mods
+        if any([
+            'CC[C@H](C)' in content,
+            'CC[C@@H](C)' in content,
+            'C(C)C[C@H]' in content and 'CC(C)C' not in content,
+            'C(C)C[C@@H]' in content and 'CC(C)C' not in content
+        ]):
+            return 'Ile', mods
+        if ('[C@H](C)' in content or '[C@@H](C)' in content):
+            if not any(p in content for p in ['C(C)C', 'COC', 'CN(', 'C(C)O', 'CC[C@H]', 'CC[C@@H]']):
+                return 'Ala', mods
+        # Tyrosine (Tyr) - 4-hydroxybenzyl side chain
+        if re.search(r'Cc[0-9]ccc\(O\)cc[0-9]', content):
+            return 'Tyr', mods
+        # Serine (Ser) - Hydroxymethyl side chain
+        if '[C@H](CO)' in content or '[C@@H](CO)' in content:
+            if not ('C(C)O' in content or 'COC' in content):
+                return 'Ser', mods
+        # Threonine (Thr) - 1-hydroxyethyl side chain
+        if '[C@@H]([C@@H](C)O)' in content or '[C@H]([C@H](C)O)' in content or '[C@@H](C)O' in content or '[C@H](C)O' in content:
+            return 'Thr', mods
+        # Cysteine (Cys) - Thiol side chain
+        if '[C@H](CS)' in content or '[C@@H](CS)' in content:
+            return 'Cys', mods
+        # Methionine (Met) - Methylthioethyl side chain
+        if ('C[C@H](CCSC)' in content or 'C[C@@H](CCSC)' in content):
+            return 'Met', mods
+        # Asparagine (Asn) - Carbamoylmethyl side chain
+        if ('CC(=O)N' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
+            return 'Asn', mods
+        # Glutamine (Gln) - Carbamoylethyl side chain
+        if ('CCC(=O)N' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
+            return 'Gln', mods
+        # Aspartic acid (Asp) - Carboxymethyl side chain
+        if ('CC(=O)O' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
+            return 'Asp', mods
+        # Glutamic acid (Glu) - Carboxyethyl side chain
+        if ('CCC(=O)O' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
+            return 'Glu', mods
+        # Arginine (Arg) - 3-guanidinopropyl side chain
+        if ('CCCNC(=N)N' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
+            return 'Arg', mods
+        # Histidine (His) - Imidazole side chain
+        if ('Cc2cnc[nH]2' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
+            return 'His', mods
+        return None, mods
+    def get_modifications(self, segment):
+        """Get modifications based on bond types"""
+        mods = []
+        if segment.get('bond_after'):
+            if 'N(C)' in segment['bond_after'] or segment['bond_after'].startswith('C(=O)N(C)'):
+                mods.append('N-Me')
+            if 'OC(=O)' in segment['bond_after']:
+                mods.append('O-linked')
+        return mods
+    def analyze_structure(self, smiles):
+        """Main analysis function with debug output"""
+        print("\nAnalyzing structure:", smiles)
+        # Split into segments
+        segments = self.split_on_bonds(smiles)
+        print("\nSegment Analysis:")
+        sequence = []
+        for i, segment in enumerate(segments):
+            print(f"\nSegment {i}:")
+            print(f"Content: {segment['content']}")
+            print(f"Bond before: {segment.get('bond_before', 'None')}")
+            print(f"Bond after: {segment.get('bond_after', 'None')}")
+            residue, mods = self.identify_residue(segment)
+            if residue:
+                if mods:
+                    sequence.append(f"{residue}({','.join(mods)})")
+                else:
+                    sequence.append(residue)
+                print(f"Identified as: {residue}")
+                print(f"Modifications: {mods}")
+            else:
+                print(f"Warning: Could not identify residue in segment: {segment['content']}")
+        # Check if cyclic
+        is_cyclic, peptide_cycles, aromatic_cycles = self.is_cyclic(smiles)
+        three_letter = '-'.join(sequence)
+        one_letter = ''.join(self.three_to_one.get(aa.split('(')[0], 'X') for aa in sequence)
+        if is_cyclic:
+            three_letter = f"cyclo({three_letter})"
+            one_letter = f"cyclo({one_letter})"
+        print(f"\nFinal sequence: {three_letter}")
+        print(f"One-letter code: {one_letter}")
+        print(f"Is cyclic: {is_cyclic}")
+        #print(f"Peptide cycles: {peptide_cycles}")
+        #print(f"Aromatic cycles: {aromatic_cycles}")
+        return three_letter, len(segments)
+        """return {
+            'three_letter': three_letter,
+            #'one_letter': one_letter,
+            'is_cyclic': is_cyclic
+        }"""
+    def return_sequence(self, smiles):
+        """Main analysis function with debug output"""
+        print("\nAnalyzing structure:", smiles)
+        # Split into segments
+        segments = self.split_on_bonds(smiles)
+        print("\nSegment Analysis:")
+        sequence = []
+        for i, segment in enumerate(segments):
+            print(f"\nSegment {i}:")
+            print(f"Content: {segment['content']}")
+            print(f"Bond before: {segment.get('bond_before', 'None')}")
+            print(f"Bond after: {segment.get('bond_after', 'None')}")
+            residue, mods = self.identify_residue(segment)
+            if residue:
+                if mods:
+                    sequence.append(f"{residue}({','.join(mods)})")
+                else:
+                    sequence.append(residue)
+                print(f"Identified as: {residue}")
+                print(f"Modifications: {mods}")
+            else:
+                print(f"Warning: Could not identify residue in segment: {segment['content']}")
+        return sequence
+"""
+def annotate_cyclic_structure(mol, sequence):
+    '''Create annotated 2D structure with clear, non-overlapping residue labels'''
+    # Generate 2D coordinates
+    # Generate 2D coordinates
+    AllChem.Compute2DCoords(mol)
+    # Create drawer with larger size for annotations
+    drawer = Draw.rdMolDraw2D.MolDraw2DCairo(2000, 2000)  # Even larger size
+    # Get residue list and reverse it to match structural representation
+    if sequence.startswith('cyclo('):
+        residues = sequence[6:-1].split('-')
+    else:
+        residues = sequence.split('-')
+    residues = list(reversed(residues))  # Reverse the sequence
+    # Draw molecule first to get its bounds
+    drawer.drawOptions().addAtomIndices = False
+    drawer.DrawMolecule(mol)
+    drawer.FinishDrawing()
+    # Convert to PIL Image
+    img = Image.open(BytesIO(drawer.GetDrawingText()))
+    draw = ImageDraw.Draw(img)
+    try:
+        # Try to use DejaVuSans as it's commonly available on Linux systems
+        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 60)
+        small_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 60)
+    except OSError:
+        try:
+            # Fallback to Arial if available (common on Windows)
+            font = ImageFont.truetype("arial.ttf", 60)
+            small_font = ImageFont.truetype("arial.ttf", 60)
+        except OSError:
+            # If no TrueType fonts are available, fall back to default
+            print("Warning: TrueType fonts not available, using default font")
+            font = ImageFont.load_default()
+            small_font = ImageFont.load_default()
+    # Get molecule bounds
+    conf = mol.GetConformer()
+    positions = []
+    for i in range(mol.GetNumAtoms()):
+        pos = conf.GetAtomPosition(i)
+        positions.append((pos.x, pos.y))
+    x_coords = [p[0] for p in positions]
+    y_coords = [p[1] for p in positions]
+    min_x, max_x = min(x_coords), max(x_coords)
+    min_y, max_y = min(y_coords), max(y_coords)
+    # Calculate scaling factors
+    scale = 150  # Increased scale factor
+    center_x = 1000  # Image center
+    center_y = 1000
+    # Add residue labels in a circular arrangement around the structure
+    n_residues = len(residues)
+    radius = 700  # Distance of labels from center
+    # Start from the rightmost point (3 o'clock position) and go counterclockwise
+    # Offset by -3 positions to align with structure
+    offset = 0  # Adjust this value to match the structure alignment
+    for i, residue in enumerate(residues):
+        # Calculate position in a circle around the structure
+        # Start from 0 (3 o'clock) and go counterclockwise
+        angle = -(2 * np.pi * ((i + offset) % n_residues) / n_residues)
+        # Calculate label position
+        label_x = center_x + radius * np.cos(angle)
+        label_y = center_y + radius * np.sin(angle)
+        # Draw residue label
+        text = f"{i+1}. {residue}"
+        bbox = draw.textbbox((label_x, label_y), text, font=font)
+        padding = 10
+        draw.rectangle([bbox[0]-padding, bbox[1]-padding,
+                       bbox[2]+padding, bbox[3]+padding],
+                      fill='white', outline='white')
+        draw.text((label_x, label_y), text,
+                 font=font, fill='black', anchor="mm")
+    # Add sequence at the top with white background
+    seq_text = f"Sequence: {sequence}"
+    bbox = draw.textbbox((center_x, 100), seq_text, font=small_font)
+    padding = 10
+    draw.rectangle([bbox[0]-padding, bbox[1]-padding,
+                   bbox[2]+padding, bbox[3]+padding],
+                  fill='white', outline='white')
+    draw.text((center_x, 100), seq_text,
+             font=small_font, fill='black', anchor="mm")
+    return img
+"""
+def annotate_cyclic_structure(mol, sequence):
+    """Create structure visualization with just the sequence header"""
+    # Generate 2D coordinates
+    AllChem.Compute2DCoords(mol)
+    # Create drawer with larger size for annotations
+    drawer = Draw.rdMolDraw2D.MolDraw2DCairo(2000, 2000)
+    # Draw molecule first
+    drawer.drawOptions().addAtomIndices = False
+    drawer.DrawMolecule(mol)
+    drawer.FinishDrawing()
+    # Convert to PIL Image
+    img = Image.open(BytesIO(drawer.GetDrawingText()))
+    draw = ImageDraw.Draw(img)
+    try:
+        small_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 60)
+    except OSError:
+        try:
+            small_font = ImageFont.truetype("arial.ttf", 60)
+        except OSError:
+            print("Warning: TrueType fonts not available, using default font")
+            small_font = ImageFont.load_default()
+    # Add just the sequence header at the top
+    seq_text = f"Sequence: {sequence}"
+    bbox = draw.textbbox((1000, 100), seq_text, font=small_font)
+    padding = 10
+    draw.rectangle([bbox[0]-padding, bbox[1]-padding,
+                   bbox[2]+padding, bbox[3]+padding],
+                  fill='white', outline='white')
+    draw.text((1000, 100), seq_text,
+             font=small_font, fill='black', anchor="mm")
+    return img
+def create_enhanced_linear_viz(sequence, smiles):
+    """Create an enhanced linear representation using PeptideAnalyzer"""
+    analyzer = PeptideAnalyzer()  # Create analyzer instance
+    # Create figure with two subplots
+    fig = plt.figure(figsize=(15, 10))
+    gs = fig.add_gridspec(2, 1, height_ratios=[1, 2])
+    ax_struct = fig.add_subplot(gs[0])
+    ax_detail = fig.add_subplot(gs[1])
+    # Parse sequence and get residues
+    if sequence.startswith('cyclo('):
+        residues = sequence[6:-1].split('-')
+    else:
+        residues = sequence.split('-')
+    # Get segments using analyzer
+    segments = analyzer.split_on_bonds(smiles)
+    # Debug print
+    print(f"Number of residues: {len(residues)}")
+    print(f"Number of segments: {len(segments)}")
+    # Top subplot - Basic structure
+    ax_struct.set_xlim(0, 10)
+    ax_struct.set_ylim(0, 2)
+    num_residues = len(residues)
+    spacing = 9.0 / (num_residues - 1) if num_residues > 1 else 9.0
+    # Draw basic structure
+    y_pos = 1.5
+    for i in range(num_residues):
+        x_pos = 0.5 + i * spacing
+        # Draw amino acid box
+        rect = patches.Rectangle((x_pos-0.3, y_pos-0.2), 0.6, 0.4,
+                               facecolor='lightblue', edgecolor='black')
+        ax_struct.add_patch(rect)
+        # Draw connecting bonds if not the last residue
+        if i < num_residues - 1:
+            segment = segments[i] if i < len(segments) else None
+            if segment:
+                # Determine bond type from segment info
+                bond_type = 'ester' if 'O-linked' in segment.get('bond_after', '') else 'peptide'
+                is_n_methylated = 'N-Me' in segment.get('bond_after', '')
+                bond_color = 'red' if bond_type == 'ester' else 'black'
+                linestyle = '--' if bond_type == 'ester' else '-'
+                # Draw bond line
+                ax_struct.plot([x_pos+0.3, x_pos+spacing-0.3], [y_pos, y_pos],
+                             color=bond_color, linestyle=linestyle, linewidth=2)
+                # Add bond type label
+                mid_x = x_pos + spacing/2
+                bond_label = f"{bond_type}"
+                if is_n_methylated:
+                    bond_label += "\n(N-Me)"
+                ax_struct.text(mid_x, y_pos+0.1, bond_label,
+                             ha='center', va='bottom', fontsize=10,
+                             color=bond_color)
+        # Add residue label
+        ax_struct.text(x_pos, y_pos-0.5, residues[i],
+                      ha='center', va='top', fontsize=14)
+    # Bottom subplot - Detailed breakdown
+    ax_detail.set_ylim(0, len(segments)+1)
+    ax_detail.set_xlim(0, 1)
+    # Create detailed breakdown
+    segment_y = len(segments)  # Start from top
+    for i, segment in enumerate(segments):
+        y = segment_y - i
+        # Check if this is a bond or residue
+        residue, mods = analyzer.identify_residue(segment)
+        if residue:
+            text = f"Residue {i+1}: {residue}"
+            if mods:
+                text += f" ({', '.join(mods)})"
+            color = 'blue'
+        else:
+            # Must be a bond
+            text = f"Bond {i}: "
+            if 'O-linked' in segment.get('bond_after', ''):
+                text += "ester"
+            elif 'N-Me' in segment.get('bond_after', ''):
+                text += "peptide (N-methylated)"
+            else:
+                text += "peptide"
+            color = 'red'
+        # Add segment analysis
+        ax_detail.text(0.05, y, text, fontsize=12, color=color)
+        ax_detail.text(0.5, y, f"SMILES: {segment.get('content', '')}", fontsize=10, color='gray')
+    # If cyclic, add connection indicator
+    if sequence.startswith('cyclo('):
+        ax_struct.annotate('', xy=(9.5, y_pos), xytext=(0.5, y_pos),
+                          arrowprops=dict(arrowstyle='<->', color='red', lw=2))
+        ax_struct.text(5, y_pos+0.3, 'Cyclic Connection',
+                      ha='center', color='red', fontsize=14)
+    # Add titles and adjust layout
+    ax_struct.set_title("Peptide Structure Overview", pad=20)
+    ax_detail.set_title("Segment Analysis Breakdown", pad=20)
+    # Remove axes
+    for ax in [ax_struct, ax_detail]:
+        ax.set_xticks([])
+        ax.set_yticks([])
+        ax.axis('off')
+    plt.tight_layout()
+    return fig
+class PeptideStructureGenerator:
+    """A class to generate 3D structures of peptides using different embedding methods"""
+    @staticmethod
+    def prepare_molecule(smiles):
+        """Prepare molecule with proper hydrogen handling"""
+        mol = Chem.MolFromSmiles(smiles, sanitize=False)
+        if mol is None:
+            raise ValueError("Failed to create molecule from SMILES")
+        # Calculate valence for each atom
+        for atom in mol.GetAtoms():
+            atom.UpdatePropertyCache(strict=False)
+        # Sanitize with reduced requirements
+        Chem.SanitizeMol(mol,
+                        sanitizeOps=Chem.SANITIZE_FINDRADICALS|
+                                  Chem.SANITIZE_KEKULIZE|
+                                  Chem.SANITIZE_SETAROMATICITY|
+                                  Chem.SANITIZE_SETCONJUGATION|
+                                  Chem.SANITIZE_SETHYBRIDIZATION|
+                                  Chem.SANITIZE_CLEANUPCHIRALITY)
+        mol = Chem.AddHs(mol)
+        return mol
+    @staticmethod
+    def get_etkdg_params(attempt=0):
+        """Get ETKDG parameters with optional modifications based on attempt number"""
+        params = AllChem.ETKDGv3()
+        params.randomSeed = -1
+        params.maxIterations = 200
+        params.numThreads = 4  # Reduced for web interface
+        params.useBasicKnowledge = True
+        params.enforceChirality = True
+        params.useExpTorsionAnglePrefs = True
+        params.useSmallRingTorsions = True
+        params.useMacrocycleTorsions = True
+        params.ETversion = 2
+        params.pruneRmsThresh = -1
+        params.embedRmsThresh = 0.5
+        if attempt > 10:
+            params.bondLength = 1.5 + (attempt - 10) * 0.02
+            params.useExpTorsionAnglePrefs = False
+        return params
+    def generate_structure_etkdg(self, smiles, max_attempts=20):
+        """Generate 3D structure using ETKDG without UFF optimization"""
+        success = False
+        mol = None
+        for attempt in range(max_attempts):
+            try:
+                mol = self.prepare_molecule(smiles)
+                params = self.get_etkdg_params(attempt)
+                if AllChem.EmbedMolecule(mol, params) == 0:
+                    success = True
+                    break
+            except Exception as e:
+                continue
+        if not success:
+            raise ValueError("Failed to generate structure with ETKDG")
+        return mol
+    def generate_structure_uff(self, smiles, max_attempts=20):
+        """Generate 3D structure using ETKDG followed by UFF optimization"""
+        best_mol = None
+        lowest_energy = float('inf')
+        for attempt in range(max_attempts):
+            try:
+                test_mol = self.prepare_molecule(smiles)
+                params = self.get_etkdg_params(attempt)
+                if AllChem.EmbedMolecule(test_mol, params) == 0:
+                    res = AllChem.UFFOptimizeMolecule(test_mol, maxIters=2000,
+                                                     vdwThresh=10.0, confId=0,
+                                                     ignoreInterfragInteractions=True)
+                    if res == 0:
+                        ff = AllChem.UFFGetMoleculeForceField(test_mol)
+                        if ff:
+                            current_energy = ff.CalcEnergy()
+                            if current_energy < lowest_energy:
+                                lowest_energy = current_energy
+                                best_mol = Chem.Mol(test_mol)
+            except Exception:
+                continue
+        if best_mol is None:
+            raise ValueError("Failed to generate optimized structure")
+        return best_mol
+    @staticmethod
+    def mol_to_sdf_bytes(mol):
+        """Convert RDKit molecule to SDF file bytes"""
+        # First write to StringIO in text mode
+        sio = StringIO()
+        writer = Chem.SDWriter(sio)
+        writer.write(mol)
+        writer.close()
+        # Convert the string to bytes
+        return sio.getvalue().encode('utf-8')
+def process_input(smiles_input=None, file_obj=None, show_linear=False,
+                 show_segment_details=False, generate_3d=False, use_uff=False):
+    """Process input and create visualizations using PeptideAnalyzer"""
+    analyzer = PeptideAnalyzer()
+    temp_dir = tempfile.mkdtemp() if generate_3d else None
+    structure_files = []
+    # Handle direct SMILES input
+    if smiles_input:
+        smiles = smiles_input.strip()
+        # First check if it's a peptide using analyzer's method
+        if not analyzer.is_peptide(smiles):
+            return "Error: Input SMILES does not appear to be a peptide structure.", None, None
+        try:
+            # Create molecule
+            mol = Chem.MolFromSmiles(smiles)
+            if mol is None:
+                return "Error: Invalid SMILES notation.", None, None
+            # Generate 3D structures if requested
+            if generate_3d:
+                generator = PeptideStructureGenerator()
+                try:
+                    # Generate ETKDG structure
+                    mol_etkdg = generator.generate_structure_etkdg(smiles)
+                    etkdg_path = os.path.join(temp_dir, "structure_etkdg.sdf")
+                    writer = Chem.SDWriter(etkdg_path)
+                    writer.write(mol_etkdg)
+                    writer.close()
+                    structure_files.append(etkdg_path)
+                    # Generate UFF structure if requested
+                    if use_uff:
+                        mol_uff = generator.generate_structure_uff(smiles)
+                        uff_path = os.path.join(temp_dir, "structure_uff.sdf")
+                        writer = Chem.SDWriter(uff_path)
+                        writer.write(mol_uff)
+                        writer.close()
+                        structure_files.append(uff_path)
+                except Exception as e:
+                    return f"Error generating 3D structures: {str(e)}", None, None, None
+            # Use analyzer to get sequence
+            segments = analyzer.split_on_bonds(smiles)
+            # Process segments and build sequence
+            sequence_parts = []
+            output_text = ""
+            # Only include segment analysis in output if requested
+            if show_segment_details:
+                output_text += "Segment Analysis:\n"
+                for i, segment in enumerate(segments):
+                    output_text += f"\nSegment {i}:\n"
+                    output_text += f"Content: {segment['content']}\n"
+                    output_text += f"Bond before: {segment.get('bond_before', 'None')}\n"
+                    output_text += f"Bond after: {segment.get('bond_after', 'None')}\n"
+                    residue, mods = analyzer.identify_residue(segment)
+                    if residue:
+                        if mods:
+                            sequence_parts.append(f"{residue}({','.join(mods)})")
+                        else:
+                            sequence_parts.append(residue)
+                        output_text += f"Identified as: {residue}\n"
+                        output_text += f"Modifications: {mods}\n"
+                    else:
+                        output_text += f"Warning: Could not identify residue in segment: {segment['content']}\n"
+                output_text += "\n"
+            else:
+                # Just build sequence without detailed analysis in output
+                for segment in segments:
+                    residue, mods = analyzer.identify_residue(segment)
+                    if residue:
+                        if mods:
+                            sequence_parts.append(f"{residue}({','.join(mods)})")
+                        else:
+                            sequence_parts.append(residue)
+            # Check if cyclic using analyzer's method
+            is_cyclic, peptide_cycles, aromatic_cycles = analyzer.is_cyclic(smiles)
+            three_letter = '-'.join(sequence_parts)
+            one_letter = ''.join(analyzer.three_to_one.get(aa.split('(')[0], 'X') for aa in sequence_parts)
+            if is_cyclic:
+                three_letter = f"cyclo({three_letter})"
+                one_letter = f"cyclo({one_letter})"
+            # Create cyclic structure visualization
+            img_cyclic = annotate_cyclic_structure(mol, three_letter)
+            # Create linear representation if requested
+            img_linear = None
+            if show_linear:
+                fig_linear = create_enhanced_linear_viz(three_letter, smiles)
+                buf = BytesIO()
+                fig_linear.savefig(buf, format='png', bbox_inches='tight', dpi=300)
+                buf.seek(0)
+                img_linear = Image.open(buf)
+                plt.close(fig_linear)
+            # Add summary to output
+            summary = "Summary:\n"
+            summary += f"Sequence: {three_letter}\n"
+            summary += f"One-letter code: {one_letter}\n"
+            summary += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
+            #if is_cyclic:
+                #summary += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
+                #summary += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
+            if structure_files:
+                summary += "\n3D Structures Generated:\n"
+                for filepath in structure_files:
+                    summary += f"- {os.path.basename(filepath)}\n"
+            return summary + output_text, img_cyclic, img_linear, structure_files if structure_files else None
+        except Exception as e:
+            return f"Error processing SMILES: {str(e)}", None, None, None
+    # Handle file input
+    if file_obj is not None:
+        try:
+            # Handle file content
+            if hasattr(file_obj, 'name'):
+                with open(file_obj.name, 'r') as f:
+                    content = f.read()
+            else:
+                content = file_obj.decode('utf-8') if isinstance(file_obj, bytes) else str(file_obj)
+            output_text = ""
+            for line in content.splitlines():
+                smiles = line.strip()
+                if smiles:
+                    # Check if it's a peptide
+                    if not analyzer.is_peptide(smiles):
+                        output_text += f"Skipping non-peptide SMILES: {smiles}\n"
+                        continue
+                    # Process this SMILES
+                    segments = analyzer.split_on_bonds(smiles)
+                    sequence_parts = []
+                    # Add segment details if requested
+                    if show_segment_details:
+                        output_text += f"\nSegment Analysis for SMILES: {smiles}\n"
+                        for i, segment in enumerate(segments):
+                            output_text += f"\nSegment {i}:\n"
+                            output_text += f"Content: {segment['content']}\n"
+                            output_text += f"Bond before: {segment.get('bond_before', 'None')}\n"
+                            output_text += f"Bond after: {segment.get('bond_after', 'None')}\n"
+                            residue, mods = analyzer.identify_residue(segment)
+                            if residue:
+                                if mods:
+                                    sequence_parts.append(f"{residue}({','.join(mods)})")
+                                else:
+                                    sequence_parts.append(residue)
+                                output_text += f"Identified as: {residue}\n"
+                                output_text += f"Modifications: {mods}\n"
+                    else:
+                        for segment in segments:
+                            residue, mods = analyzer.identify_residue(segment)
+                            if residue:
+                                if mods:
+                                    sequence_parts.append(f"{residue}({','.join(mods)})")
+                                else:
+                                    sequence_parts.append(residue)
+                    # Get cyclicity and create sequence
+                    is_cyclic, peptide_cycles, aromatic_cycles = analyzer.is_cyclic(smiles)
+                    sequence = f"cyclo({'-'.join(sequence_parts)})" if is_cyclic else '-'.join(sequence_parts)
+                    output_text += f"\nSummary for SMILES: {smiles}\n"
+                    output_text += f"Sequence: {sequence}\n"
+                    output_text += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
+                    if is_cyclic:
+                        output_text += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
+                        #output_text += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
+                    output_text += "-" * 50 + "\n"
+            return output_text, None, None
+        except Exception as e:
+            return f"Error processing file: {str(e)}", None, None
+    return "No input provided.", None, None

utils/timer.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import time, torch
+from collections import defaultdict
+from contextlib import contextmanager
+class StepTimer:
+    def __init__(self, device=None):
+        self.times = defaultdict(list)
+        self.device = device
+        self._use_cuda_sync = (
+            isinstance(device, torch.device) and device.type == "cuda"
+        ) or (isinstance(device, str) and "cuda" in device)
+    @contextmanager
+    def section(self, name):
+        if self._use_cuda_sync:
+            torch.cuda.synchronize()
+        t0 = time.perf_counter()
+        try:
+            yield
+        finally:
+            if self._use_cuda_sync:
+                torch.cuda.synchronize()
+            dt = time.perf_counter() - t0
+            self.times[name].append(dt)
+    def summary(self, top_k=None):
+        # returns (name, count, total, mean, p50, p95)
+        import numpy as np
+        rows = []
+        for k, v in self.times.items():
+            a = np.array(v, dtype=float)
+            rows.append((k, len(a), a.sum(), a.mean(), np.median(a), np.percentile(a, 95)))
+        rows.sort(key=lambda r: r[2], reverse=True)  # by total time
+        return rows[:top_k] if top_k else rows

utils/utils.py ADDED Viewed

	@@ -0,0 +1,135 @@

+"""Console logger utilities.
+Copied from https://github.com/HazyResearch/transformers/blob/master/src/utils/utils.py
+Copied from https://docs.python.org/3/howto/logging-cookbook.html#using-a-context-manager-for-selective-logging
+"""
+import logging
+import fsspec
+import lightning
+import torch
+from timm.scheduler import CosineLRScheduler
+import argparse
+import numpy as np
+import random
+import os
+def sample_categorical_logits(logits, dtype=torch.float64):
+  # do not require logits to be log-softmaxed
+  gumbel_noise = -(1e-10 - (torch.rand_like(logits, dtype=dtype) + 1e-10).log()).log()
+  return (logits + gumbel_noise).argmax(dim=-1)
+def fsspec_exists(filename):
+  """Check if a file exists using fsspec."""
+  fs, _ = fsspec.core.url_to_fs(filename)
+  return fs.exists(filename)
+def fsspec_listdir(dirname):
+  """Listdir in manner compatible with fsspec."""
+  fs, _ = fsspec.core.url_to_fs(dirname)
+  return fs.ls(dirname)
+def fsspec_mkdirs(dirname, exist_ok=True):
+  """Mkdirs in manner compatible with fsspec."""
+  fs, _ = fsspec.core.url_to_fs(dirname)
+  fs.makedirs(dirname, exist_ok=exist_ok)
+def print_nans(tensor, name):
+  if torch.isnan(tensor).any():
+    print(name, tensor)
+class CosineDecayWarmupLRScheduler(
+  CosineLRScheduler,
+  torch.optim.lr_scheduler._LRScheduler):
+  def __init__(self, *args, **kwargs):
+    super().__init__(*args, **kwargs)
+    self._last_epoch = -1
+    self.step(epoch=0)
+  def step(self, epoch=None):
+    if epoch is None:
+      self._last_epoch += 1
+    else:
+      self._last_epoch = epoch
+    # We call either step or step_update, depending on
+    # whether we're using the scheduler every epoch or every
+    # step.
+    # Otherwise, lightning will always call step (i.e.,
+    # meant for each epoch), and if we set scheduler
+    # interval to "step", then the learning rate update will
+    # be wrong.
+    if self.t_in_epochs:
+      super().step(epoch=self._last_epoch)
+    else:
+      super().step_update(num_updates=self._last_epoch)
+class LoggingContext:
+  """Context manager for selective logging."""
+  def __init__(self, logger, level=None, handler=None, close=True):
+    self.logger = logger
+    self.level = level
+    self.handler = handler
+    self.close = close
+  def __enter__(self):
+    if self.level is not None:
+      self.old_level = self.logger.level
+      self.logger.setLevel(self.level)
+    if self.handler:
+      self.logger.addHandler(self.handler)
+  def __exit__(self, et, ev, tb):
+    if self.level is not None:
+      self.logger.setLevel(self.old_level)
+    if self.handler:
+      self.logger.removeHandler(self.handler)
+    if self.handler and self.close:
+      self.handler.close()
+def get_logger(name=__name__, level=logging.INFO) -> logging.Logger:
+  """Initializes multi-GPU-friendly python logger."""
+  logger = logging.getLogger(name)
+  logger.setLevel(level)
+  # this ensures all logging levels get marked with the rank zero decorator
+  # otherwise logs would get multiplied for each GPU process in multi-GPU setup
+  for level in ('debug', 'info', 'warning', 'error',
+                'exception', 'fatal', 'critical'):
+    setattr(logger,
+            level,
+            lightning.pytorch.utilities.rank_zero_only(
+              getattr(logger, level)))
+  return logger
+def str2bool(v):
+  if isinstance(v, bool):
+    return v
+  if v.lower() in ('yes', 'true', 't', 'y', '1'):
+    return True
+  elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+    return False
+  else:
+    raise argparse.ArgumentTypeError('Boolean value expected.')
+def set_seed(seed, use_cuda):
+  os.environ['PYTHONHASHSEED'] = str(seed)
+  np.random.seed(seed)
+  random.seed(seed)
+  torch.manual_seed(seed)
+  # torch.backends.cudnn.deterministic = True
+  if use_cuda:
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+  print(f'=> Seed of the run set to {seed}')