adi2075 commited on Feb 21

Commit

4e4ff14

verified ·

1 Parent(s): 5906fbf

Upload 16 files

Browse files

Files changed (17) hide show

.gitattributes +1 -0
analysis.py +84 -0
app.py +188 -0
data/perovskite_dataset.pt +3 -0
generate.py +93 -0
model_weights.pth +3 -0
rdf_analysis.png +0 -0
requirements.txt +4 -0
result_plot.png +3 -0
src/__pycache__/layers.cpython-312.pyc +0 -0
src/__pycache__/model.cpython-312.pyc +0 -0
src/data_loader.py +89 -0
src/layers.py +88 -0
src/model.py +89 -0
train.py +98 -0
validate.py +66 -0
visualize.py +80 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+result_plot.png filter=lfs diff=lfs merge=lfs -text

analysis.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+from src.model import CrystalDiffusionModel
+# Load your model and generate a crystal
+def generate_crystal():
+    model = CrystalDiffusionModel()
+    model.load_state_dict(torch.load("model_weights.pth", map_location='cpu'))
+    model.eval()
+    # Generate 5 atoms
+    num_atoms = 5
+    z = torch.tensor([56, 22, 8, 8, 8]) # BaTiO3
+    # Graph Setup
+    row = torch.repeat_interleave(torch.arange(num_atoms), num_atoms)
+    col = torch.arange(num_atoms).repeat(num_atoms)
+    mask = row != col
+    edge_index = torch.stack([row[mask], col[mask]], dim=0)
+    # Diffusion
+    x = torch.randn(num_atoms, 3) # Start with noise
+    steps = 50
+    dt = 1.0 / steps
+    for i in range(steps):
+        t = torch.tensor([[1.0 - i*dt]])
+        with torch.no_grad():
+            pred = model(x, z, t, edge_index)
+        x = x + (pred - x) * 0.1
+    return x.numpy()
+def compute_rdf(coords, box_size=5.0, bins=50):
+    """
+    Calculates the Radial Distribution Function (RDF).
+    """
+    distances = []
+    num_atoms = len(coords)
+    for i in range(num_atoms):
+        for j in range(i + 1, num_atoms):
+            dist = np.linalg.norm(coords[i] - coords[j])
+            distances.append(dist)
+    # Histogram
+    hist, bin_edges = np.histogram(distances, bins=bins, range=(0, box_size))
+    r = (bin_edges[:-1] + bin_edges[1:]) / 2
+    # Normalize (Volume correction)
+    dr = bin_edges[1] - bin_edges[0]
+    volume = 4 * np.pi * r**2 * dr
+    rdf = hist / (volume * num_atoms) # Density normalization
+    return r, rdf
+def plot_comparison():
+    print("Generating Analysis Plot...")
+    # 1. Get RDF for Random Noise
+    noise = np.random.randn(5, 3)
+    r_noise, rdf_noise = compute_rdf(noise)
+    # 2. Get RDF for Generated Crystal
+    crystal = generate_crystal()
+    r_crys, rdf_crys = compute_rdf(crystal)
+    # 3. Plot
+    plt.figure(figsize=(10, 6))
+    plt.plot(r_noise, rdf_noise, label='Random Noise', linestyle='--', color='gray')
+    plt.plot(r_crys, rdf_crys, label='Generated Crystal (AI)', linewidth=3, color='blue')
+    plt.title("Radial Distribution Function (RDF) Analysis")
+    plt.xlabel("Distance (Angstroms)")
+    plt.ylabel("Probability Density")
+    plt.legend()
+    plt.grid(True, alpha=0.3)
+    plt.savefig("rdf_analysis.png")
+    print("✅ Saved 'rdf_analysis.png'. Put this in your README!")
+if __name__ == "__main__":
+    plot_comparison()

app.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import streamlit as st
+import torch
+import numpy as np
+import py3Dmol
+from stmol import showmol
+from src.model import CrystalDiffusionModel
+# --- PAGE CONFIGURATION ---
+st.set_page_config(
+    page_title="CrystalDiff: AI Material Designer",
+    layout="wide",
+    page_icon="💎",
+    initial_sidebar_state="expanded"
+)
+# --- SIDEBAR: CONTROLS & INFO ---
+with st.sidebar:
+    st.title("💎 CrystalDiff Controls")
+    st.markdown("### 1. Select Chemistry")
+    target_atom = st.selectbox(
+        "Choose A-Site Cation",
+        ["Ca (Calcium)", "Sr (Strontium)", "Ba (Barium)", "Pb (Lead)"],
+        index=1,
+        help="The large atom in the center of the cage."
+    )
+    st.markdown("### 2. Diffusion Settings")
+    steps = st.slider("Denoising Steps", 10, 100, 50, help="More steps = higher quality, but slower.")
+    noise_scale = st.slider("Initial Chaos (Noise)", 0.5, 2.0, 1.0, help="Higher noise means the AI has to be more creative.")
+    st.divider()
+    st.markdown("### 🧠 How it Works")
+    st.info("""
+    **Generative Diffusion:**
+    The model starts with random noise (chaos) and iteratively subtracts noise to find a stable crystal structure.
+    **E(n)-Equivariance:**
+    The AI uses a custom Graph Neural Network that respects the laws of physics (rotational symmetry).
+    """)
+    st.markdown("---")
+    st.caption("Built with PyTorch & Streamlit by Aditya Mangal. Inspired by DeepMind's work on generative models for materials science.")
+# --- MAIN PAGE ---
+st.title("💎 CrystalDiff: Generative Material Design")
+st.markdown("""
+This application uses **Geometric Deep Learning** to hallucinate new stable crystals.
+It was trained on the **Materials Project** database to understand the chemical rules of **Perovskite Oxides ($ABO_3$)**.
+""")
+# Map selection to Atomic Number
+atom_map = {
+    "Ca (Calcium)": 20, "Sr (Strontium)": 38,
+    "Ba (Barium)": 56, "Pb (Lead)": 82
+}
+selected_z = atom_map[target_atom]
+formula_display = f"{target_atom.split()[0]}TiO₃"
+# --- HELPER FUNCTIONS ---
+@st.cache_resource
+def load_model():
+    device = torch.device("cpu")
+    model = CrystalDiffusionModel()
+    try:
+        model.load_state_dict(torch.load("model_weights.pth", map_location=device))
+        model.eval()
+        return model, device
+    except FileNotFoundError:
+        return None, None
+def calculate_metrics(pos, z):
+    """Calculates bond lengths to validate physics."""
+    # Find Ti (22) and O (8)
+    ti_idx = [i for i, atom in enumerate(z) if atom == 22]
+    o_idx = [i for i, atom in enumerate(z) if atom == 8]
+    if not ti_idx or not o_idx: return 0.0
+    ti_pos = pos[ti_idx[0]]
+    dists = []
+    for o in o_idx:
+        d = np.linalg.norm(ti_pos - pos[o])
+        dists.append(d)
+    return np.mean(dists)
+def make_view(pos, z):
+    """Creates a 3D molecule view"""
+    view = py3Dmol.view(width=800, height=500)
+    xyz_str = f"{len(pos)}\nGenerated\n"
+    for i in range(len(pos)):
+        elem = "O" if z[i] == 8 else "Ti" if z[i] == 22 else target_atom.split()[0]
+        xyz_str += f"{elem} {pos[i,0]:.4f} {pos[i,1]:.4f} {pos[i,2]:.4f}\n"
+    view.addModel(xyz_str, "xyz")
+    # Style: spheres for atoms, sticks for bonds
+    view.setStyle({'sphere': {'scale': 0.25}, 'stick': {'radius': 0.1}})
+    view.zoomTo()
+    return view
+# --- APP LOGIC ---
+model, device = load_model()
+if model is None:
+    st.error(" Model weights not found! Please run 'train.py' first.")
+    st.stop()
+# Layout: Two columns
+col1, col2 = st.columns([1, 2])
+with col1:
+    st.subheader("🧪 Experiment Setup")
+    st.write(f"**Target Material:** {formula_display}")
+    st.write(f"**Structure Family:** Cubic Perovskite")
+    if st.button("✨ Generate Crystal", type="primary", use_container_width=True):
+        # 1. Setup Data
+        z = torch.tensor([selected_z, 22, 8, 8, 8], device=device) # A-Site, Ti, O, O, O
+        num_atoms = 5
+        # Graph connections
+        row = torch.repeat_interleave(torch.arange(num_atoms), num_atoms)
+        col = torch.arange(num_atoms).repeat(num_atoms)
+        mask = row != col
+        edge_index = torch.stack([row[mask], col[mask]], dim=0).to(device)
+        # 2. Diffusion Loop
+        x = torch.randn(num_atoms, 3, device=device) * noise_scale
+        progress_bar = st.progress(0)
+        status = st.empty()
+        dt = 1.0 / steps
+        for i in range(steps):
+            t_val = 1.0 - (i * dt)
+            t_tensor = torch.tensor([[t_val]], device=device)
+            with torch.no_grad():
+                x_pred = model(x, z, t_tensor, edge_index)
+            # Euler update
+            x = x + (x_pred - x) * 0.1
+            if i % 5 == 0:
+                progress_bar.progress(i / steps)
+                status.text(f"Denoising... Step {i}/{steps}")
+        progress_bar.progress(1.0)
+        status.success("Done!")
+        # 3. Store result in session state to keep it on screen
+        st.session_state['generated_pos'] = x.numpy()
+        st.session_state['generated_z'] = z.numpy()
+with col2:
+    st.subheader("⚛️ 3D Visualization")
+    if 'generated_pos' in st.session_state:
+        pos = st.session_state['generated_pos']
+        z = st.session_state['generated_z']
+        # Calculate Physics
+        avg_bond = calculate_metrics(pos, z)
+        # Display Metrics
+        m1, m2 = st.columns(2)
+        m1.metric("Avg Ti-O Bond Length", f"{avg_bond:.3f} Å")
+        # Validation Logic
+        if 1.8 < avg_bond < 2.2:
+            m2.success("✅ Physically Valid")
+        else:
+            m2.warning("⚠️ Unstable Structure")
+        # Render 3D
+        view = make_view(pos, z)
+        showmol(view, height=500, width=800)
+    else:
+        st.info("👈 Select your chemistry on the left and click 'Generate Crystal' to start the AI.")
+        st.markdown("""
+        <div style="text-align: center; padding: 50px; border: 2px dashed #444; border-radius: 10px; margin-top: 20px;">
+            <h1 style="color: #666;">🧊</h1>
+            <p style="color: #888;">Waiting for generation...</p>
+        </div>
+        """, unsafe_allow_html=True)

data/perovskite_dataset.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a29272d987115656b20c250170a62efa7d76142ad58a8d85da6ceccedd797db
+size 31765

generate.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import torch
+import numpy as np
+from src.model import CrystalDiffusionModel
+# --- CONFIGURATION ---
+# Select the atoms you want to generate!
+# Example: BaTiO3 (Barium Titanate) -> Ba=56, Ti=22, O=8
+# Example: SrTiO3 (Strontium Titanate) -> Sr=38, Ti=22, O=8
+# Example: CaTiO3 (Calcium Titanate) -> Ca=20, Ti=22, O=8
+# Let's try generating Strontium Titanate (SrTiO3) this time
+TARGET_ATOMS = [38, 22, 8, 8, 8]  # Sr, Ti, O, O, O
+MODEL_PATH = "model_weights.pth"
+STEPS = 50  # Number of diffusion steps
+def save_xyz(pos, z, filename):
+    """
+    Saves the crystal in XYZ format for visualization.
+    """
+    with open(filename, "w") as f:
+        f.write(f"{len(pos)}\n")
+        f.write("Generated by CrystalDiff\n")
+        for i in range(len(pos)):
+            # Simple periodic table lookup for common perovskite elements
+            # You can add more if you generate other materials
+            elem_map = {
+                8: "O", 22: "Ti", 20: "Ca",
+                56: "Ba", 38: "Sr", 82: "Pb",
+                26: "Fe", 40: "Zr"
+            }
+            atom_symbol = elem_map.get(int(z[i]), "X") # Default to X if unknown
+            f.write(f"{atom_symbol} {pos[i,0]:.4f} {pos[i,1]:.4f} {pos[i,2]:.4f}\n")
+def generate():
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"--- 💎 Generating Crystal on {device} ---")
+    # 1. Load Model
+    model = CrystalDiffusionModel().to(device)
+    try:
+        model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
+    except FileNotFoundError:
+        print(f"❌ Error: Could not find '{MODEL_PATH}'. Did you run train.py?")
+        return
+    model.eval()
+    # 2. Setup Target Chemistry
+    z = torch.tensor(TARGET_ATOMS, device=device)
+    num_atoms = len(z)
+    print(f"Target Atoms: {z.tolist()}")
+    # Create fully connected graph
+    row = torch.repeat_interleave(torch.arange(num_atoms), num_atoms)
+    col = torch.arange(num_atoms).repeat(num_atoms)
+    mask = row != col
+    edge_index = torch.stack([row[mask], col[mask]], dim=0).to(device)
+    # 3. Start with Pure Noise (The "Chaos")
+    # We use a noise scale of 1.0 to match training
+    x = torch.randn(num_atoms, 3, device=device)
+    print(f"Initial State: Random Gas Cloud")
+    save_xyz(x, z, "gen_step_00.xyz")
+    # 4. The Reverse Diffusion Loop
+    dt = 1.0 / STEPS
+    for i in range(STEPS):
+        # Time goes from 1.0 -> 0.0
+        t_val = 1.0 - (i * dt)
+        t_tensor = torch.tensor([[t_val]], device=device)
+        with torch.no_grad():
+            # Predict where the atoms SHOULD be
+            x_pred = model(x, z, t_tensor, edge_index)
+        # Update Position (Euler Integration)
+        # We move 10% towards the prediction at each step for stability
+        x = x + (x_pred - x) * 0.1
+        if i % 10 == 0:
+            print(f"Step {i}/{STEPS}: Denoising...")
+            save_xyz(x, z, f"gen_step_{i:02d}.xyz")
+    # 5. Final Save
+    print(f"✅ Final Structure Generated!")
+    save_xyz(x, z, "gen_final.xyz")
+    print("Check 'gen_final.xyz' to see your crystal.")
+if __name__ == "__main__":
+    generate()

model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1507e932ff678dd90a144ff2b5e5f280870f528ee9769e8a9303d8a56f226099
+size 355888

rdf_analysis.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+mp-api
+python-dotenv
+streamlit
+py3Dmol

result_plot.png ADDED Viewed

Git LFS Details

SHA256: 0ab1f0d27d26aebee590e099bf021c08bd86887432cd450d2ff8bf9de3f64463
Pointer size: 131 Bytes
Size of remote file: 774 kB

src/__pycache__/layers.cpython-312.pyc ADDED Viewed

Binary file (3.25 kB). View file

src/__pycache__/model.cpython-312.pyc ADDED Viewed

Binary file (3.81 kB). View file

src/data_loader.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import torch
+from mp_api.client import MPRester
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+from tqdm import tqdm
+# --- CONFIGURATION ---
+load_dotenv()  # Load .env file for API keys
+API_KEY = os.getenv("MPI_API_KEY")
+# Save path: project root `data/` folder
+repo_root = Path(__file__).resolve().parents[1]
+SAVE_PATH = repo_root / "data" / "perovskite_dataset.pt"
+def fetch_data(limit=2000):
+    """
+    Fetches a large dataset of ABO3 Perovskites (5 atoms) for the Foundation Model.
+    """
+    print(f"Connecting to Materials Project...")
+    with MPRester(API_KEY) as mpr:
+        # 1. Broad Search: Get all stable materials with 5 atoms
+        # We search for materials with exactly 5 sites (atoms) in the unit cell.
+        # This implicitly targets ABO3 structures (1+1+3 = 5).
+        docs = mpr.materials.summary.search(
+            is_stable=True,
+            nsites=5,
+            fields=["structure", "material_id", "formula_pretty"]
+        )
+    print(f"Found {len(docs)} stable 5-atom crystals. Processing...")
+    dataset = []
+    # 2. Filter and Process
+    # We want oxygen-containing perovskites generally, but let's keep it broad for now.
+    # The 'nsites=5' filter does most of the heavy lifting.
+    count = 0
+    for doc in tqdm(docs):
+        if count >= limit:
+            break
+        structure = doc.structure
+        formula = doc.formula_pretty
+        # Heuristic check: Perovskites usually have 3 Oxygens.
+        # This filters out random 5-atom things that aren't Perovskites.
+        # (Optional but recommended for cleaner data)
+        if "O3" not in formula:
+            continue
+        # --- TENSOR CREATION ---
+        # A. Atomic Numbers (Integers) -> The "Identity"
+        atomic_numbers = [site.specie.number for site in structure]
+        z_tensor = torch.tensor(atomic_numbers, dtype=torch.long)
+        # B. Coordinates (Floats) -> The "Geometry"
+        coords = [site.coords for site in structure]
+        r_tensor = torch.tensor(coords, dtype=torch.float32)
+        # C. Center of Mass Correction (CRITICAL for Diffusion)
+        # We shift the crystal so its center is at (0,0,0).
+        # If we don't do this, the model wastes time learning absolute positions.
+        r_tensor = r_tensor - torch.mean(r_tensor, dim=0, keepdim=True)
+        # Create Data Object
+        data_point = {
+            "id": str(doc.material_id),
+            "formula": formula,
+            "z": z_tensor,   # Features
+            "pos": r_tensor  # Positions (Centered)
+        }
+        dataset.append(data_point)
+        count += 1
+    # 3. Save to Disk
+    # Ensure directory exists
+    SAVE_PATH.parent.mkdir(parents=True, exist_ok=True)
+    torch.save(dataset, SAVE_PATH)
+    print(f"✅ Successfully saved {len(dataset)} crystals to {SAVE_PATH}")
+    print(f"   (Filtered for 5-atom unit cells containing 'O3')")
+if __name__ == "__main__":
+    fetch_data(limit=2000)

src/layers.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import torch
+import torch.nn as nn
+class EGNNLayer(nn.Module):
+    """
+    Equivariant GNN.
+    Update node features (h) and coordinates (x) while respecting rotation.
+    """
+    def __init__(self, c_in, c_out):
+        super().__init__()
+        # Edge MLP: Compute message based on features and distance
+        # input: h_i + h_j + distance(1)
+        self.edge_mlp = nn.Sequential(
+            nn.Linear(c_in * 2 + 1, c_out), # c_in * 2 + 1 means we concatenate h_i, h_j and the distance scalar
+            nn.SiLU(),
+            nn.Linear(c_out, c_out),
+            nn.SiLU()
+        )
+        # Node MLP: Update atom features
+        # Input: h_i + aggregated_message
+        self.node_mlp = nn.Sequential(
+            nn.Linear(c_in + c_out, c_out),
+            nn.SiLU(),
+            nn.Linear(c_out, c_out)
+        )
+        # Coord MLP: Update position (x)
+        # Input : message (c_out)
+        self.coord_mlp = nn.Sequential(
+            nn.Linear(c_out, 1), # output a single scalar 'weight' for the coordinate update
+            nn.Tanh() # keeps updates stable (-1 to 1)
+        )
+    def forward(self, h, x, edge_index):
+        """
+        h: Node features (N, c_in)
+        x: Coordinates (N, 3)
+        edge_index: Adjacency list (2, E) where E is number of edges-> who connects to whom
+        """
+        row, col = edge_index   # row = source, col = target
+        # setp 1 : calculate distance
+        # get coordinates of source and target nodes
+        x_i = x[row] # (E, 3)
+        x_j = x[col] # (E, 3),
+        # example: if edge_index has [0, 1] in row and [2, 3] in col,
+        # then x_i will have coordinates of nodes 0 and 1, while x_j will have coordinates of nodes 2 and 3
+        # calculate squared distance (rotation invariant)
+        dist_sq = torch.sum((x_i - x_j)**2, dim=-1, keepdim=True)
+        # sum(-1) means we sum over the coordinate dimension, resulting in a scalar distance for each edge. keepdim=True keeps the output shape as (E, 1)
+        # step 2 : calculate edge messages
+        # Concatenate: Feature_i, Feature_j, Distance
+        # h[row] for source node features, h[col] for target node features
+        edge_input = torch.cat([h[row], h[col], dist_sq], dim=-1) # (E, c_in*2 + 1)
+        # pass through edge MLP to get messages
+        m_ij = self.edge_mlp(edge_input)
+        # step 3 : Update coordinates ( Equivariant part)
+        # predict a weight for vector (x_i - x_j) based on the message
+        coord_weight = self.coord_mlp(m_ij)
+        # Update x_new = x + sum((x_i - x_j) * weight), transform
+        trans = (x_i - x_j) * coord_weight
+        # Aggregate coordinate updates using scatter_add_ (preserves autograd)
+        idx_exp = row.unsqueeze(-1).expand(-1, x.size(-1))  # (E, 3)
+        x_agg = torch.zeros_like(x)
+        x_agg = x_agg.scatter_add_(0, idx_exp, trans)
+        x_new = x + x_agg
+        # step 4 : Update node features
+        m_idx_exp = row.unsqueeze(-1).expand(-1, m_ij.size(-1))
+        m_agg = torch.zeros(h.shape[0], m_ij.shape[1], device=h.device)
+        m_agg = m_agg.scatter_add_(0, m_idx_exp, m_ij)
+        # Combine old features with new message
+        h_input = torch.cat([h, m_agg], dim=-1)
+        h_new = self.node_mlp(h_input)
+        return h_new, x_new

src/model.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import torch
+import torch.nn as nn
+from src.layers import EGNNLayer
+class TimeEmbedding(nn.Module):
+    """
+    Converts a time scalar 't' into a vector embedding.
+    This allows the neural network to understand the noise level (time step).
+    """
+    def __init__(self, dim: int):
+        super().__init__()
+        self.dim = dim
+        self.linear_1 = nn.Linear(1, dim)
+        self.linear_2 = nn.Linear(dim, dim)
+        self.act = nn.SiLU() # SiLU is standard for diffusion models
+    def forward(self, t: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            t (Tensor): Time scalars of shape (Batch_Size, 1).
+        Returns:
+            Tensor: Time embeddings of shape (Batch_Size, dim).
+        """
+        # t is shape (Batch_Size, 1) -> we want to output (Batch_Size, dim)
+        x = self.act(self.linear_1(t))
+        x = self.linear_2(x)
+        return x
+class CrystalDiffusionModel(nn.Module):
+    """
+    E(n)-Equivariant Diffusion Model for Crystal Generation.
+    Predicts the denoised coordinates given a noisy input.
+    """
+    def __init__(self, hidden_dim: int = 64, num_layers: int = 3, max_atom_type: int = 100):
+        super().__init__()
+        # 1. Atom Embedding: Integer -> Vector
+        # Maps atomic numbers (e.g., 8 for Oxygen) to a dense vector
+        self.atom_embed = nn.Embedding(max_atom_type, hidden_dim)
+        # 2. Time Embedding: Scalar -> Vector
+        # Helps the model know if it's looking at pure noise (t=1) or a crystal (t=0)
+        self.time_embed = TimeEmbedding(hidden_dim)
+        # 3. Backbone: Stack of Equivariant GNN layers
+        # These update both features (h) and positions (x)
+        self.layers = nn.ModuleList([
+            EGNNLayer(c_in=hidden_dim, c_out=hidden_dim)
+            for _ in range(num_layers)
+        ])
+        # Note: We don't need a final linear layer for positions because
+        # the EGNN layers update the coordinates 'x' directly at every step.
+    def forward(self, x: torch.Tensor, z: torch.Tensor, t: torch.Tensor, edge_index: torch.Tensor) -> torch.Tensor:
+        """
+        Forward pass of the diffusion model.
+        Args:
+            x (Tensor): Noisy atom positions. Shape (N, 3).
+            z (Tensor): Atomic numbers. Shape (N,).
+            t (Tensor): Time step/Noise level. Shape (Batch_Size, 1).
+            edge_index (Tensor): Graph connectivity (Adjacency list). Shape (2, E).
+        Returns:
+            Tensor: Denoised atom positions. Shape (N, 3).
+        """
+        # 1. Embed Inputs
+        h = self.atom_embed(z)      # (N, hidden_dim)
+        t_emb = self.time_embed(t)  # (Batch, hidden_dim)
+        # 2. Condition on Time
+        # Broadcast time embedding to all atoms in the batch
+        # (Assuming single batch or handled externally for simplicity)
+        h = h + t_emb.mean(dim=0, keepdim=True)
+        # 3. Message Passing (The "Brain")
+        for layer in self.layers:
+            # Update features (h) and positions (x) respecting symmetry
+            h, x = layer(h, x, edge_index)
+        # Return the updated (denoised) positions
+        return x

train.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import torch
+import torch.optim as optim
+import random
+import os
+from src.model import CrystalDiffusionModel
+# --- CONFIGURATION ---
+# Check your data folder to ensure filename matches exactly
+DATA_PATH = "data/perovskite_dataset.pt"
+EPOCHS = 3000
+LEARNING_RATE = 1e-3
+def load_dataset():
+    if not os.path.exists(DATA_PATH):
+        raise FileNotFoundError(f"❌ Could not find dataset at {DATA_PATH}. Check spelling!")
+    data = torch.load(DATA_PATH)
+    print(f"✅ Loaded {len(data)} crystals for training.")
+    return data
+def get_random_batch(dataset, device):
+    """
+    Picks a RANDOM crystal from the dataset.
+    This is crucial for generalization (learning rules vs memorizing one shape).
+    """
+    # 1. Pick random sample
+    sample = random.choice(dataset)
+    # 2. Extract Data
+    z = sample["z"].to(device).long()
+    x_real = sample["pos"].to(device).float()
+    # 3. Build Graph (Fully Connected)
+    # We build this dynamically in case crystals have different sizes
+    num_atoms = z.shape[0]
+    # Create all pairs (0,0), (0,1)... (N,N)
+    row = torch.repeat_interleave(torch.arange(num_atoms), num_atoms)
+    col = torch.arange(num_atoms).repeat(num_atoms)
+    # Remove self-loops (atoms don't connect to themselves)
+    mask = row != col
+    edge_index = torch.stack([row[mask], col[mask]], dim=0).to(device)
+    return x_real, z, edge_index
+def train():
+    # 1. Setup
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"--- 🚀 Training on {device} ---")
+    # Load Data
+    dataset = load_dataset()
+    # Initialize Model
+    model = CrystalDiffusionModel().to(device)
+    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
+    model.train()
+    print(f"--- Starting Training Loop ({EPOCHS} Epochs) ---")
+    for epoch in range(1, EPOCHS + 1):
+        optimizer.zero_grad()
+        # 2. Get Random Batch
+        x_real, z, edge_index = get_random_batch(dataset, device)
+        # 3. Diffusion Step (Forward)
+        # Sample random time 't' (how much noise to add)
+        t = torch.rand(1, 1, device=device)
+        # Create Noise
+        noise = torch.randn_like(x_real)
+        # Add noise: x_noisy = Real + (Noise * t)
+        x_noisy = x_real + (noise * t)
+        # 4. Model Prediction (Reverse)
+        # Predict the denoised structure
+        x_pred = model(x_noisy, z, t, edge_index)
+        # 5. Calculate Loss
+        # We want the predicted position to match the real position
+        loss = torch.mean((x_pred - x_real)**2)
+        loss.backward()
+        optimizer.step()
+        # Log progress
+        if epoch % 200 == 0:
+            print(f"Epoch {epoch} | Loss: {loss.item():.6f}")
+    # Save the smarter model
+    torch.save(model.state_dict(), "model_weights.pth")
+    print("✅ Training Complete. Model saved to model_weights.pth!")
+if __name__ == "__main__":
+    train()

validate.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import numpy as np
+def read_xyz(filename):
+    coords = []
+    atoms = []
+    with open(filename, 'r') as f:
+        lines = f.readlines()
+        for line in lines[2:]:
+            parts = line.split()
+            atoms.append(parts[0])
+            coords.append(np.array([float(parts[1]), float(parts[2]), float(parts[3])]))
+    return atoms, coords
+def check_physics():
+    print("--- 🧪 Scientific Validation ---")
+    # 1. Load the Generated Crystal
+    atoms, coords = read_xyz("gen_final.xyz")
+    # 2. Find the Titanium (Ti) and Oxygens (O)
+    # Note: In our code, we mapped:
+    # 22 -> Ti (Titanium)
+    # 8  -> O  (Oxygen)
+    # 20 -> Ca (Calcium)
+    ti_indices = [i for i, atom in enumerate(atoms) if atom == "Ti"]
+    o_indices = [i for i, atom in enumerate(atoms) if atom == "O"]
+    if not ti_indices or not o_indices:
+        print("❌ Could not find Ti or O atoms to measure bonds.")
+        return
+    print(f"Found {len(ti_indices)} Titanium and {len(o_indices)} Oxygen atoms.")
+    # 3. Measure Distances
+    bond_lengths = []
+    for ti_idx in ti_indices:
+        ti_pos = coords[ti_idx]
+        for o_idx in o_indices:
+            o_pos = coords[o_idx]
+            # Calculate Euclidean Distance
+            dist = np.linalg.norm(ti_pos - o_pos)
+            bond_lengths.append(dist)
+    # 4. Analyze Results
+    min_bond = min(bond_lengths)
+    avg_bond = sum(bond_lengths) / len(bond_lengths)
+    print(f"\nMeasured Bond Lengths (Ti - O):")
+    print(f"   Minimum: {min_bond:.4f} Å")
+    print(f"   Average: {avg_bond:.4f} Å")
+    # 5. The "DeepMind" Pass/Fail
+    # Real Physics: Ti-O bond is typically 1.90 - 2.05 Å
+    # We allow some error since this is a tiny model trained for 5 minutes
+    if 1.5 < min_bond < 2.5:
+        print("\n✅ SUCCESS: The model learned valid chemical bonds!")
+        print("   (Target range: ~1.9 Å)")
+    else:
+        print("\n⚠️  WARNING: Bonds are physically unrealistic.")
+        print("   (Try training for more epochs or checking the dataset)")
+if __name__ == "__main__":
+    check_physics()

visualize.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+import numpy as np
+def read_xyz(filename):
+    """
+    Reads an XYZ file and returns coordinates and atom types.
+    """
+    coords = []
+    atoms = []
+    with open(filename, 'r') as f:
+        lines = f.readlines()
+        # Skip header lines (first 2)
+        for line in lines[2:]:
+            parts = line.split()
+            atoms.append(parts[0])
+            coords.append([float(parts[1]), float(parts[2]), float(parts[3])])
+    return np.array(coords), atoms
+def plot_crystal(ax, coords, atoms, title):
+    """
+    Plots a single crystal in a 3D subplot.
+    """
+    # Define colors for atoms (Titanium=Silver, Oxygen=Red, Ca=Green)
+    colors = {'Ti': 'gray', 'O': 'red', 'Ca': 'green', 'Pb': 'black', 'I': 'purple'}
+    # Scatter plot
+    # s=size of atom, alpha=transparency
+    for i, atom in enumerate(atoms):
+        color = colors.get(atom, 'blue') # Default to blue if unknown
+        ax.scatter(coords[i,0], coords[i,1], coords[i,2],
+                  c=color, s=200, edgecolors='k', alpha=0.8)
+    # Draw "bonds" (lines between atoms close to each other)
+    # This helps visualize the structure
+    num_atoms = len(coords)
+    for i in range(num_atoms):
+        for j in range(i + 1, num_atoms):
+            dist = np.linalg.norm(coords[i] - coords[j])
+            # If atoms are closer than 2.8 Angstroms, draw a line
+            if dist < 2.8:
+                ax.plot([coords[i,0], coords[j,0]],
+                        [coords[i,1], coords[j,1]],
+                        [coords[i,2], coords[j,2]],
+                        c='black', linewidth=1, alpha=0.5)
+    ax.set_title(title)
+    ax.set_xlabel('X')
+    ax.set_ylabel('Y')
+    ax.set_zlabel('Z')
+    # Set consistent limits so we can compare
+    ax.set_xlim(-2, 5)
+    ax.set_ylim(-2, 5)
+    ax.set_zlim(-2, 5)
+def create_comparison_figure():
+    # 1. Read Data
+    # Make sure you ran generate.py first to get these files!
+    noise_pos, atoms = read_xyz("gen_step_00.xyz")
+    final_pos, _ = read_xyz("gen_final.xyz")
+    # 2. Setup Plot
+    fig = plt.figure(figsize=(12, 6))
+    # Plot 1: The Noise
+    ax1 = fig.add_subplot(121, projection='3d')
+    plot_crystal(ax1, noise_pos, atoms, "Step 0: Random Noise")
+    # Plot 2: The Generated Crystal
+    ax2 = fig.add_subplot(122, projection='3d')
+    plot_crystal(ax2, final_pos, atoms, "Step 50: Generated Crystal")
+    plt.tight_layout()
+    plt.savefig("result_plot.png", dpi=300)
+    print("Saved comparison figure to 'result_plot.png'")
+    plt.show()
+if __name__ == "__main__":
+    create_comparison_figure()