bndl's picture
Upload 115 files
4f5540c
import os, pickle
import qml
import numpy as np
import pandas as pd
from tqdm import tqdm
from polymerlearn.utils.comparison_rep.element_info import *
import ase
from dscribe.descriptors import SOAP
from dscribe.descriptors import MBTR as MBTR_
import pyarrow.feather as feather
def Atom_XYZ(xyz_list):
atoms = []
charges = []
coords = np.empty((0,3), float)
for j in xyz_list:
while j.startswith(" "):
j = j[1:]
while "\t" in j:
j = j.replace("\t", " ")
while " " in j:
j = j.replace(" ", " ")
temp = j[:-1].split(" ")
print(temp)
atoms.append(temp[0])
coords = np.append(coords, np.array([[temp[1], temp[2], temp[3]]]), axis=0)
charges.append(AtomicNumber(temp[0]))
return atoms, charges, coords
def CM(new_struct):
'''
new_struct: one chunk of XYZ file
'''
atoms, charges, coords = Atom_XYZ(new_struct)
mol = qml.representations.generate_coulomb_matrix(nuclear_charges=charges,
coordinates=coords,
size=len(atoms),
sorting='row-norm'
)
return mol
def BOB(new_struct):
'''
new_struct: one chunk of XYZ file
'''
atoms, charges, coords = Atom_XYZ(new_struct)
atom_dict = {}
for j in atoms:
if j not in atom_dict:
atom_dict[j] = 1
else:
atom_dict[j] += 1
mol = qml.representations.generate_bob(nuclear_charges=charges,
coordinates=coords,
atomtypes=np.unique(np.asarray(atoms)),
size=len(atoms),
asize=atom_dict
)
return mol
def mySOAP(new_struct):
'''
new_struct: one chunk of XYZ file
'''
atoms, charges, coords = Atom_XYZ(new_struct)
species = np.unique(np.asarray(atoms))
soap = SOAP(species=species,
periodic=False,
rcut=3.0,
nmax=5,
lmax=4
)
mol = soap.create(system=ase.Atoms(positions=coords, numbers=charges),
positions=coords,
n_jobs=1,
)
return mol.flatten()
def MBTR(new_struct):
atoms, charges, coords = Atom_XYZ(new_struct)
species = np.unique(np.asarray(atoms))
mbtr = MBTR_(species=species,
k1={
"geometry": {"function": "atomic_number"},
"grid": {"min": 0, "max": 8, "n": 100, "sigma": 0.1},
},
k2={
"geometry": {"function": "inverse_distance"},
"grid": {"min": 0, "max": 1, "n": 100, "sigma": 0.1},
"weighting": {"function": "exponential", "scale": 0.5, "cutoff": 1e-3},
},
k3={
"geometry": {"function": "cosine"},
"grid": {"min": -1, "max": 1, "n": 100, "sigma": 0.1},
"weighting": {"function": "exponential", "scale": 0.5, "cutoff": 1e-3},
},
periodic=False,
normalization="l2_each",
flatten=True
)
mol = mbtr.create(system=ase.Atoms(positions=coords, numbers=charges),
n_jobs=1) # Hardcoded number of jobs
return mol
def get_one_top_xyz(filename):
'''
Gets the top XYZ chunk for the top conformer in file. Ready to input to representation generator
'''
with open(filename) as f:
ff = f.readlines()
natoms = int(ff[0])
end = natoms + 1
file_chunk = ff[2:end]
return file_chunk
def screen_build(all_AG,
xyz_loc = '../../../Structures/AG/xyz',
rep_dir_loc = '../../../Representations',
reps_to_screen = ['MBTR']):
'''
Screens all acids/glycols in a dataframe, builds representations of a given type
'''
gen_dict = {
'CM': CM,
'SOAP': mySOAP,
'BOB': BOB,
'MBTR': MBTR
}
xyzpath = lambda x: os.path.join(xyz_loc, x)
for rep in reps_to_screen:
print('REP', rep)
for ag in all_AG:
rloc_rep = os.path.join(rep_dir_loc, rep, 'AG')
pickle_path = os.path.join(rloc_rep, ag.lower() + '.pickle')
if not os.path.exists(pickle_path):
fchunk = get_one_top_xyz(xyzpath(ag + '.xyz'))
F = gen_dict[rep]
mol = F(fchunk)
pickle.dump(mol, open(pickle_path, 'wb'))
#feather.write_feather(mol, feather_path)
if __name__ == '__main__':
data = pd.read_csv('../../../dataset/pub_data.csv')
ac = (20,33); gc = (34,46)
acid_names = [c[1:] for c in data.columns[ac[0]:ac[1]].tolist() if '95% trans' not in c]
glycol_names = [c[1:] for c in data.columns[gc[0]:gc[1]].tolist()]
full_names = acid_names + glycol_names
xyz_loc = '../../../Structures/AG'
screen_build(full_names)