Skip to content
This repository was archived by the owner on Dec 8, 2024. It is now read-only.

Commit 2b46207

Browse files
author
Silvia
authored
Develop (#98)
* Corrected small bug in predict function * Started updating so that model can be trained after its been reloaded * Minor modifications * Updated model so one can predict from xyz and disabled shuffling in training because it leads to a problem with predictions * Fix for the problem of shuffling * Added some tests to make sure the predictions work * Fixed a tensorboard problem * The saving of the model doesn't cause an error if the directory already exists * Fixed a bug that made a test fail * Modified the name of a parameter * Made modifications to make te symmetry functions more numerically stable * Added a hack that makes ARMP work with fortran ACSF when there are padded representations. Currently works *ONLY* when there is one molecule for the whole data set. * corrected bug in score function for padded molecules * Changes that make the model work quickly even when there is padding. * Fixed discrepancies between fortran and TF acsf * Corrected bug in setting of ACSF parameters * Attempt at fixing issue #10 * another attempt at fixing #10 * Removed a pointless line * set-up * Added the graceful killer * Modifications which prevent installation from breaking on BC4 * Modification to add neural networks to qmlearn * Fix for issue #8 * Random comment * Started including the atomic model * Made the atomic neural network work * Fixed a bug with the indices * Now training and predictions don't use the default graph, to avoid problems * uncommented examples * Removed unique_elements in data class This can be stored in the NN class, but I might reverse the change later * Made tensorflow an optional dependency The reason for this approach is that pip would just auto install tensorflow and you might want the gpu version or your own compiled one. * Made is_numeric non-private and removed legacy code * Added 1d array util function * Removed QML check and moved functions from utils to tf_utils * Support for linear models (no hidden layers) * fixed import bug in tf_utils * Added text to explain that you are scoring on training set * Restructure. But elements are still not working Sorted elements * Moved documentation from init to class * Constant features will now be removed at fit/predict time * Moved get_batch_size back into utils, since it doesn't depend on tf * Made the NeuralNetwork class compliant with sklearn Cannot be any transforms of the input data * Fixed tests that didn't pass * Fixed mistake in checks of set_classes() in ARMP * started fixing ARMP bugs for QM7 * Fixed bug in padding and added examples that give low errors * Attempted fix to make representations single precision * Hot fix for AtomScaler * Minor bug fixes * More bug fixes to make sure tests run * Fixed some tests that had failures * Reverted the fchl tests to original * Fixed path in acsf test * Readded changes to tests * Modifications after code review * Version with the ACSF basis functions starting at 0.8 A * Updated ACSF representations so that the minimum distance at which to start the binning can be set by the user * Modified the name of the new parameter (minimum distance of the binning in ACSF)
1 parent 2c9c7b7 commit 2b46207

7 files changed

Lines changed: 46 additions & 30 deletions

File tree

qml/aglaia/aglaia.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ def _set_acsf_parameters(self, params):
594594
"""
595595

596596
self.acsf_parameters = {'rcut': 5.0, 'acut': 5.0, 'nRs2': 5, 'nRs3': 5, 'nTs': 5,
597-
'zeta': 220.127, 'eta': 30.8065}
597+
'zeta': 220.127, 'eta': 30.8065, 'bin_min': 0.8}
598598

599599
if params is not None:
600600
for key, value in params.items():
@@ -842,6 +842,10 @@ def _check_acsf_values(self):
842842
if is_numeric_array(self.acsf_parameters['zeta']):
843843
raise InputError("Expecting a scalar value for zeta. Got %s." % (self.acsf_parameters['zeta']))
844844

845+
if not is_positive_or_zero(self.acsf_parameters['bin_min']):
846+
raise InputError(
847+
"Expected positive or zero float for variable 'bin_min'. Got %s." % str(self.acsf_parameters['bin_min']))
848+
845849
def _get_msize(self, pad = 0):
846850
"""
847851
Gets the maximum number of atoms in a single molecule. To support larger molecules
@@ -1695,7 +1699,8 @@ def _generate_acsf_tf(self, xyz, classes):
16951699
nRs2=self.acsf_parameters['nRs2'],
16961700
nRs3=self.acsf_parameters['nRs3'],
16971701
nTs=self.acsf_parameters['nTs'], eta=self.acsf_parameters['eta'],
1698-
zeta=self.acsf_parameters['zeta'])
1702+
zeta=self.acsf_parameters['zeta'],
1703+
bin_min=self.acsf_parameters['bin_min'])
16991704

17001705
sess = tf.Session()
17011706
sess.run(tf.global_variables_initializer())
@@ -1765,7 +1770,8 @@ def _generate_acsf_fortran(self, xyz, classes):
17651770
nTs=self.acsf_parameters['nTs'],
17661771
eta2=self.acsf_parameters['eta'],
17671772
eta3=self.acsf_parameters['eta'],
1768-
zeta=self.acsf_parameters['zeta'])
1773+
zeta=self.acsf_parameters['zeta'],
1774+
bin_min=self.acsf_parameters['bin_min'])
17691775

17701776
padded_g = np.zeros((initial_natoms, g.shape[-1]))
17711777
padded_g[:g.shape[0], :] = g
@@ -2121,7 +2127,7 @@ def _check_representation_parameters(self, parameters):
21212127
elif self.representation_name == "acsf":
21222128

21232129
acsf_parameters = {'rcut': 5.0, 'acut': 5.0, 'nRs2': 5, 'nRs3': 5, 'nTs': 5,
2124-
'zeta': 220.127, 'eta': 30.8065}
2130+
'zeta': 220.127, 'eta': 30.8065, 'bin_min':0.8}
21252131

21262132
for key, value in parameters.items():
21272133
try:
@@ -2423,7 +2429,8 @@ def _build_model_from_xyz(self, n_atoms, element_weights, element_biases):
24232429
nRs3=self.acsf_parameters['nRs3'],
24242430
nTs=self.acsf_parameters['nTs'],
24252431
eta=self.acsf_parameters['eta'],
2426-
zeta=self.acsf_parameters['zeta'])
2432+
zeta=self.acsf_parameters['zeta'],
2433+
bin_min=self.acsf_parameters['bin_min'])
24272434

24282435
with tf.name_scope("Model_pred"):
24292436
batch_energies_nn = self._model(batch_representation, batch_zs, element_weights, element_biases)

qml/aglaia/np_symm_funct.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def acsf_ang(xyzs, Zs, element_pairs, angular_cutoff, angular_rs, theta_s, zeta,
188188
return np.asarray(total_descriptor)
189189

190190
def generate_acsf_np(xyzs, Zs, elements, element_pairs, rcut, acut, nRs2,
191-
nRs3, nTs, zeta, eta):
191+
nRs3, nTs, zeta, eta, bin_min):
192192
"""
193193
This function calculates the symmetry functions used in the tensormol paper.
194194
@@ -203,11 +203,12 @@ def generate_acsf_np(xyzs, Zs, elements, element_pairs, rcut, acut, nRs2,
203203
:param theta_s: list of all the thetas parameters. Numpy array of shape (n_thetas,)
204204
:param zeta: parameter. scalar.
205205
:param eta: parameter. scalar.
206+
:param bin_min: value at which to start the binning of the distances
206207
:return: numpy array of shape (n_samples, n_atoms, n_rad_rs*n_elements + n_ang_rs*n_thetas*n_element_pairs)
207208
"""
208209

209-
radial_rs = np.linspace(0, rcut, nRs2)
210-
angular_rs = np.linspace(0, acut, nRs3)
210+
radial_rs = np.linspace(bin_min, rcut, nRs2)
211+
angular_rs = np.linspace(bin_min, acut, nRs3)
211212
theta_s = np.linspace(0, np.pi, nTs)
212213

213214
rad_term = acsf_rad(xyzs, Zs, elements, rcut, radial_rs, eta)

qml/aglaia/symm_funct.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ def sum_ang(pre_sumterm, Zs, element_pairs_list, angular_rs, theta_s):
381381
return clean_final_term
382382

383383
def generate_acsf_tf(xyzs, Zs, elements, element_pairs, rcut, acut,
384-
nRs2, nRs3, nTs, zeta, eta):
384+
nRs2, nRs3, nTs, zeta, eta, bin_min):
385385
"""
386386
This function generates the atom centred symmetry function as used in the Tensormol paper. Currently only tested for
387387
single systems with many conformations. It requires the coordinates of all the atoms in each data sample, the atomic
@@ -410,13 +410,15 @@ def generate_acsf_tf(xyzs, Zs, elements, element_pairs, rcut, acut,
410410
:type zeta: scalar float
411411
:param eta: parameter in the exponential terms
412412
:type eta: scalar float
413+
:param bin_min: the value at which to start binning the distances
414+
:type bin_min: positive float
413415
414416
:return: the atom centred symmetry functions
415417
:rtype: a tf tensor of shape a tf tensor of shape (n_samples, n_atoms, nRs2 * n_elements + nRs3 * nTs * n_elementpairs)
416418
"""
417419

418-
radial_rs = np.linspace(0, rcut, nRs2)
419-
angular_rs = np.linspace(0, acut, nRs3)
420+
radial_rs = np.linspace(bin_min, rcut, nRs2)
421+
angular_rs = np.linspace(bin_min, acut, nRs3)
420422
theta_s = np.linspace(0, np.pi, nTs)
421423

422424
with tf.name_scope("acsf_params"):

qml/data/compound.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ def generate_slatm(self, mbtypes,
295295
self.representation = slatm
296296

297297
def generate_acsf(self, elements = [1,6,7,8,16], nRs2 = 3, nRs3 = 3, nTs = 3, eta2 = 1,
298-
eta3 = 1, zeta = 1, rcut = 5, acut = 5, gradients = False):
298+
eta3 = 1, zeta = 1, rcut = 5, acut = 5, bin_min=0.8, gradients = False):
299299
"""
300300
Generate the variant of atom-centered symmetry functions used in https://doi.org/10.1039/C7SC04934J
301301
@@ -317,12 +317,14 @@ def generate_acsf(self, elements = [1,6,7,8,16], nRs2 = 3, nRs3 = 3, nTs = 3, et
317317
:type rcut: float
318318
:param acut: Cut-off radius of the three-body terms
319319
:type acut: float
320+
:param bin_min: the value at which to start binning the distances
321+
:type bin_min: positive float
320322
:param gradients: To return gradients or not
321323
:type gradients: boolean
322324
"""
323325

324-
Rs2 = np.linspace(0, rcut, nRs2)
325-
Rs3 = np.linspace(0, acut, nRs3)
326+
Rs2 = np.linspace(bin_min, rcut, nRs2)
327+
Rs3 = np.linspace(bin_min, acut, nRs3)
326328
Ts = np.linspace(0, np.pi, nTs)
327329
n_elements = len(elements)
328330
natoms = len(self.coordinates)

qml/representations/representations.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -549,8 +549,8 @@ def generate_slatm(coordinates, nuclear_charges, mbtypes,
549549

550550
return mbs
551551

552-
def generate_acsf(nuclear_charges, coordinates, elements = [1,6,7,8,16], nRs2 = 3, nRs3 = 3, nTs = 3, eta2 = 1,
553-
eta3 = 1, zeta = 1, rcut = 5, acut = 5, gradients = False):
552+
def generate_acsf(nuclear_charges, coordinates, elements = [1,6,7,8,16], nRs2 = 3, nRs3 = 3, nTs = 3, eta2 = 1,
553+
eta3 = 1, zeta = 1, rcut = 5, acut = 5, bin_min=0.8, gradients = False):
554554
"""
555555
Generate the variant of atom-centered symmetry functions used in https://doi.org/10.1039/C7SC04934J
556556
@@ -576,14 +576,16 @@ def generate_acsf(nuclear_charges, coordinates, elements = [1,6,7,8,16], nRs2 =
576576
:type rcut: float
577577
:param acut: Cut-off radius of the three-body terms
578578
:type acut: float
579+
:param bin_min: the value at which to start binning the distances
580+
:type bin_min: positive float
579581
:param gradients: To return gradients or not
580582
:type gradients: boolean
581583
:return: Atom-centered symmetry functions representation
582584
:rtype: numpy array
583585
"""
584586

585-
Rs2 = np.linspace(0, rcut, nRs2)
586-
Rs3 = np.linspace(0, acut, nRs3)
587+
Rs2 = np.linspace(bin_min, rcut, nRs2)
588+
Rs3 = np.linspace(bin_min, acut, nRs3)
587589
Ts = np.linspace(0, np.pi, nTs)
588590
n_elements = len(elements)
589591
natoms = len(coordinates)

test/test_acsf.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def test_acsf_1():
4949
acut = 5
5050
zeta = 220.127
5151
eta = 30.8065
52+
bin_min = 0.0
5253

5354
input_data = test_dir + "/data/data_test_acsf.npz"
5455
data = np.load(input_data)
@@ -65,13 +66,13 @@ def test_acsf_1():
6566
zs_tf = tf.placeholder(shape=[n_samples, n_atoms], dtype=tf.int32, name="zs")
6667
xyz_tf = tf.placeholder(shape=[n_samples, n_atoms, 3], dtype=tf.float32, name="xyz")
6768

68-
acsf_tf_t = symm_funct.generate_acsf_tf(xyz_tf, zs_tf, elements, element_pairs, rcut, acut, nRs2, nRs3, nTs, zeta, eta)
69+
acsf_tf_t = symm_funct.generate_acsf_tf(xyz_tf, zs_tf, elements, element_pairs, rcut, acut, nRs2, nRs3, nTs, zeta, eta, bin_min)
6970

7071
sess = tf.Session()
7172
sess.run(tf.global_variables_initializer())
7273
acsf_tf = sess.run(acsf_tf_t, feed_dict={xyz_tf: xyzs, zs_tf: zs})
7374

74-
acsf_np = np_symm_funct.generate_acsf_np(xyzs, zs, elements, element_pairs, rcut, acut, nRs2, nRs3, nTs, zeta, eta)
75+
acsf_np = np_symm_funct.generate_acsf_np(xyzs, zs, elements, element_pairs, rcut, acut, nRs2, nRs3, nTs, zeta, eta, bin_min)
7576

7677
n_samples = xyzs.shape[0]
7778
n_atoms = xyzs.shape[1]
@@ -97,6 +98,7 @@ def test_acsf_2():
9798
acut = 5
9899
zeta = 220.127
99100
eta = 30.8065
101+
bin_min = 0.0
100102

101103
input_data = test_dir + "/data/qm7_testdata.npz"
102104
data = np.load(input_data)
@@ -113,13 +115,13 @@ def test_acsf_2():
113115
zs_tf = tf.placeholder(shape=[n_samples, max_n_atoms], dtype=tf.int32, name="zs")
114116
xyz_tf = tf.placeholder(shape=[n_samples, max_n_atoms, 3], dtype=tf.float32, name="xyz")
115117

116-
acsf_tf_t = symm_funct.generate_acsf_tf(xyz_tf, zs_tf, elements, element_pairs, rcut, acut, nRs2, nRs3, nTs, zeta, eta)
118+
acsf_tf_t = symm_funct.generate_acsf_tf(xyz_tf, zs_tf, elements, element_pairs, rcut, acut, nRs2, nRs3, nTs, zeta, eta, bin_min)
117119

118120
sess = tf.Session()
119121
sess.run(tf.global_variables_initializer())
120122
acsf_tf = sess.run(acsf_tf_t, feed_dict={xyz_tf: xyzs, zs_tf: zs})
121123

122-
acsf_np = np_symm_funct.generate_acsf_np(xyzs, zs, elements, element_pairs, rcut, acut, nRs2, nRs3, nTs, zeta, eta)
124+
acsf_np = np_symm_funct.generate_acsf_np(xyzs, zs, elements, element_pairs, rcut, acut, nRs2, nRs3, nTs, zeta, eta, bin_min)
123125

124126
for i in range(n_samples):
125127
for j in range(max_n_atoms):
@@ -130,7 +132,6 @@ def test_acsf_2():
130132
acsf_tf_sort = np.sort(acsf_tf[i][j])
131133
np.testing.assert_array_almost_equal(acsf_np_sort, acsf_tf_sort, decimal=4)
132134

133-
134135
if __name__ == "__main__":
135136
test_acsf_1()
136137
test_acsf_2()

test/test_symm_funct.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def test_acsf():
6363
"qm7/0110.xyz"]
6464

6565

66-
path = test_dir = os.path.dirname(os.path.realpath(__file__))
66+
path = os.path.dirname(os.path.realpath(__file__))
6767

6868
mols = []
6969
for xyz_file in files:
@@ -85,7 +85,7 @@ def fort_acsf(mols, path, elements):
8585
# Generate atom centered symmetry functions representation
8686
# using the Compound class
8787
for i, mol in enumerate(mols):
88-
mol.generate_acsf(elements = elements)
88+
mol.generate_acsf(elements = elements, bin_min=0.0)
8989

9090
X_test = np.concatenate([mol.representation for mol in mols])
9191
X_ref = np.loadtxt(path + "/data/acsf_representation.txt")
@@ -96,8 +96,8 @@ def fort_acsf(mols, path, elements):
9696
rep = []
9797
for i, mol in enumerate(mols):
9898
rep.append(generate_acsf(coordinates = mol.coordinates,
99-
nuclear_charges = mol.nuclear_charges,
100-
elements = elements))
99+
nuclear_charges = mol.nuclear_charges,
100+
elements = elements, bin_min=0.0))
101101

102102
X_test = np.concatenate(rep)
103103
X_ref = np.loadtxt(path + "/data/acsf_representation.txt")
@@ -111,6 +111,7 @@ def tf_acsf(mols, path, elements):
111111
n_theta_s = 3
112112
zeta = 1.0
113113
eta = 1.0
114+
bin_min=0.0
114115

115116
element_pairs = []
116117
for i, ei in enumerate(elements):
@@ -128,7 +129,7 @@ def tf_acsf(mols, path, elements):
128129
zs_tf = tf.placeholder(shape=[n_samples, max_n_atoms], dtype=tf.int32, name="zs")
129130
xyz_tf = tf.placeholder(shape=[n_samples, max_n_atoms, 3], dtype=tf.float32, name="xyz")
130131

131-
acsf_tf_t = symm_funct.generate_acsf_tf(xyz_tf, zs_tf, elements, element_pairs, radial_cutoff, angular_cutoff, n_radial_rs, n_angular_rs, n_theta_s, zeta, eta)
132+
acsf_tf_t = symm_funct.generate_acsf_tf(xyz_tf, zs_tf, elements, element_pairs, radial_cutoff, angular_cutoff, n_radial_rs, n_angular_rs, n_theta_s, zeta, eta, bin_min)
132133

133134
sess = tf.Session()
134135
sess.run(tf.global_variables_initializer())
@@ -142,7 +143,7 @@ def fort_acsf_gradients(mols, path, elements):
142143
# Generate atom centered symmetry functions representation
143144
# and gradients using the Compound class
144145
for i, mol in enumerate(mols):
145-
mol.generate_acsf(elements = elements, gradients = True)
146+
mol.generate_acsf(elements = elements, gradients = True, bin_min=0.0)
146147

147148
X_test = np.concatenate([mol.representation for mol in mols])
148149
X_ref = np.loadtxt(path + "/data/acsf_representation.txt")
@@ -159,7 +160,7 @@ def fort_acsf_gradients(mols, path, elements):
159160
grad = []
160161
for i, mol in enumerate(mols):
161162
r, g = generate_acsf(coordinates = mol.coordinates, nuclear_charges = mol.nuclear_charges,
162-
elements = elements, gradients = True)
163+
elements = elements, gradients = True, bin_min=0.0)
163164
rep.append(r)
164165
grad.append(g)
165166

0 commit comments

Comments
 (0)