Spaces:

msun415
/

Llamole

Build error

App Files Files Community

Llamole / src /model /graph_encoder /model.py

msun415

Upload folder using huggingface_hub

13362e2 verified 5 months ago

raw

history blame contribute delete

8.1 kB

	# Copyright 2024 the Llamole team.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import os
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torch_geometric.nn import global_add_pool, global_max_pool
	from torch_geometric.nn import MessagePassing
	import json

	class GraphCLIP(nn.Module):
	def __init__(
	self,
	graph_num_layer,
	graph_hidden_size,
	dropout,
	model_config,
	):
	super().__init__()
	self.model_config = model_config
	self.hidden_size = graph_hidden_size
	self.molecule_encoder = GNNEncoder(num_layer=graph_num_layer, hidden_size=graph_hidden_size, drop_ratio=dropout)
	self.molecule_projection = ProjectionHead(embedding_dim=graph_hidden_size, projection_dim=graph_hidden_size, dropout=dropout)

	def forward(self, x, edge_index, edge_attr, batch):
	molecule_features = self.molecule_encoder(x, edge_index, edge_attr, batch)
	molecule_embeddings = self.molecule_projection(molecule_features)
	molecule_embeddings = molecule_embeddings / molecule_embeddings.norm(dim=-1, keepdim=True)
	return molecule_embeddings

	def save_pretrained(self, output_dir):
	"""
	Save the molecule encoder, projection models, and model_config to the output directory.
	"""
	if not os.path.exists(output_dir):
	os.makedirs(output_dir)

	molecule_path = os.path.join(output_dir, 'model.pt')
	proj_path = molecule_path.replace('model', 'model_proj')
	config_path = os.path.join(output_dir, 'model_config.json')

	torch.save(self.molecule_encoder.state_dict(), molecule_path)
	torch.save(self.molecule_projection.state_dict(), proj_path)

	# Save model_config to JSON file
	with open(config_path, 'w') as f:
	json.dump(self.model_config, f, indent=2)

	def disable_grads(self):
	"""
	Disable gradients for all parameters in the model.
	"""
	for param in self.parameters():
	param.requires_grad = False

	def init_model(self, model_path, verbose=True):
	molecule_path = os.path.join(model_path, 'model.pt')
	proj_path = molecule_path.replace('model', 'model_proj')
	if os.path.exists(molecule_path):
	self.molecule_encoder.load_state_dict(torch.load(molecule_path, map_location='cpu', weights_only=False))
	else:
	raise FileNotFoundError(f"Molecule encoder file not found: {molecule_path}")

	if os.path.exists(proj_path):
	self.molecule_projection.load_state_dict(torch.load(proj_path, map_location='cpu', weights_only=False))
	else:
	raise FileNotFoundError(f"Molecule projection file not found: {proj_path}")

	if verbose:
	print('GraphCLIP Models initialized.')
	print('Molecule model:\n', self.molecule_encoder)
	print('Molecule projection:\n', self.molecule_projection)

	class GNNEncoder(nn.Module):
	def __init__(self, num_layer, hidden_size, drop_ratio):

	super(GNNEncoder, self).__init__()

	self.num_layer = num_layer
	self.drop_ratio = drop_ratio
	if self.num_layer < 2:
	raise ValueError("Number of GNN layers must be greater than 1.")

	self.atom_encoder = nn.Embedding(118, hidden_size)

	### set the initial virtual node embedding to 0.
	self.virtualnode_embedding = nn.Embedding(1, hidden_size)
	nn.init.constant_(self.virtualnode_embedding.weight.data, 0)

	### List of GNNs
	self.convs = nn.ModuleList()
	self.norms = nn.ModuleList()
	self.mlp_virtualnode_list = nn.ModuleList()

	for layer in range(num_layer):
	self.convs.append(GINConv(hidden_size, drop_ratio))
	self.norms.append(nn.LayerNorm(hidden_size, elementwise_affine=True))
	if layer < num_layer - 1:
	self.mlp_virtualnode_list.append(nn.Sequential(nn.Linear(hidden_size, 4hidden_size), nn.LayerNorm(4hidden_size), nn.GELU(), nn.Dropout(drop_ratio), \
	nn.Linear(4*hidden_size, hidden_size)))

	def initialize_weights(self):
	# Initialize transformer layers:
	def _basic_init(module):
	if isinstance(module, nn.Linear):
	torch.nn.init.xavier_uniform_(module.weight)
	if module.bias is not None:
	nn.init.constant_(module.bias, 0)

	self.apply(_basic_init)

	def forward(self, x, edge_index, edge_attr, batch):

	### virtual node embeddings for graphs
	virtualnode_embedding = self.virtualnode_embedding(torch.zeros(batch[-1].item() + 1).to(edge_index.dtype).to(edge_index.device))

	h_list = [self.atom_encoder(x)]

	for layer in range(self.num_layer):
	### add message from virtual nodes to graph nodes
	h_list[layer] = h_list[layer] + virtualnode_embedding[batch]

	### Message passing among graph nodes
	h = self.convs[layer](h_list[layer], edge_index, edge_attr)
	h = self.norms[layer](h)

	if layer < self.num_layer - 1:
	h = F.gelu(h)
	h = F.dropout(h, self.drop_ratio, training = self.training)

	h = h + h_list[layer]
	h_list.append(h)

	if layer < self.num_layer - 1:
	### add message from graph nodes to virtual nodes
	virtual_pool = global_max_pool(h_list[layer], batch)
	virtualnode_embedding = virtualnode_embedding + F.dropout(self.mlp_virtualnode_list[layer](virtual_pool), self.drop_ratio, training = self.training)

	h_node = h_list[-1]
	h_graph = global_add_pool(h_node, batch)

	return h_graph

	class GINConv(MessagePassing):
	def __init__(self, hidden_size, drop_ratio):
	'''
	hidden_size (int)
	'''
	super(GINConv, self).__init__(aggr = "add")

	self.mlp = nn.Sequential(nn.Linear(hidden_size, 4hidden_size), nn.LayerNorm(4hidden_size), nn.GELU(), nn.Dropout(drop_ratio), nn.Linear(4*hidden_size, hidden_size))
	self.eps = torch.nn.Parameter(torch.Tensor([0]))
	self.bond_encoder = nn.Embedding(5, hidden_size)

	def forward(self, x, edge_index, edge_attr):
	edge_embedding = self.bond_encoder(edge_attr)
	out = self.mlp((1 + self.eps) *x + self.propagate(edge_index, x=x, edge_attr=edge_embedding))
	return out

	def message(self, x_j, edge_attr):
	return F.gelu(x_j + edge_attr)

	def update(self, aggr_out):
	return aggr_out

	class ProjectionHead(nn.Module):
	def __init__(
	self,
	embedding_dim,
	projection_dim,
	dropout,
	act_layer=nn.GELU,
	hidden_features=None,
	bias=True
	):
	super().__init__()
	projection_dim = projection_dim or embedding_dim
	hidden_features = hidden_features or embedding_dim
	linear_layer = nn.Linear

	self.fc1 = linear_layer(embedding_dim, hidden_features, bias=bias)
	self.norm1 = nn.LayerNorm(hidden_features)
	self.act = act_layer()
	self.drop1 = nn.Dropout(dropout)
	self.fc2 = linear_layer(hidden_features, projection_dim, bias=bias)

	def forward(self, x):
	x = self.fc1(x)
	x = self.norm1(x)
	x = self.act(x)
	x = self.drop1(x)
	x = self.fc2(x)
	return x