-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added drop_columns feature; usefull for optimizing cutoffs
- Loading branch information
Showing
3 changed files
with
329 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
import pytest | ||
import numpy as np | ||
import ase | ||
from uf3.data import composition | ||
from uf3.representation.process import * | ||
from uf3.regression import optimize | ||
|
||
@pytest.fixture() | ||
def Nb_Sn_chemistry(): | ||
element_list = ['Nb', 'Sn'] | ||
chemistry_config = composition.ChemicalSystem(element_list,degree=3) | ||
yield chemistry_config | ||
|
||
@pytest.fixture() | ||
def bspline_config_larger_cutoff(Nb_Sn_chemistry): | ||
element_list = ['Nb', 'Sn'] | ||
chemistry_config = composition.ChemicalSystem(element_list,degree=3) | ||
bspline_config = optimize.get_bspline_config(Nb_Sn_chemistry, rmin=0, | ||
rmax_2b=6, rmax_3b=4, | ||
knot_spacing=0.4) | ||
yield bspline_config | ||
|
||
|
||
@pytest.fixture() | ||
def bspline_config_smaller_cutoff(Nb_Sn_chemistry): | ||
element_list = ['Nb', 'Sn'] | ||
chemistry_config = composition.ChemicalSystem(element_list,degree=3) | ||
bspline_config = optimize.get_bspline_config(Nb_Sn_chemistry, rmin=0, | ||
rmax_2b=4.4, rmax_3b=3.2, | ||
knot_spacing=0.4) | ||
yield bspline_config | ||
|
||
@pytest.fixture() | ||
def Nb3Sn_geom(): | ||
geom = ase.Atoms('Nb6Sn2', | ||
positions = [[0.000000000, 2.667305470, 1.333652735], | ||
[0.000000000, 2.667305470, 4.000958204], | ||
[1.333652735, 0.000000000, 2.667305470], | ||
[4.000958204, 0.000000000, 2.667305470], | ||
[2.667305470, 1.333652735, 0.000000000], | ||
[2.667305470, 4.000958204, 0.000000000], | ||
[0.000000000, 0.000000000, 0.000000000], | ||
[2.667305470, 2.667305470, 2.667305470]], | ||
pbc = True, | ||
cell = [[5.3346109390, 0.0000000000, 0.0000000000], | ||
[0.0000000000, 5.3346109390, 0.0000000000], | ||
[0.0000000000, 0.0000000000, 5.3346109390]]) | ||
|
||
yield geom | ||
|
||
|
||
class TestOptimize: | ||
|
||
def test_get_bspline_config(self,bspline_config_larger_cutoff): | ||
|
||
for i in bspline_config_larger_cutoff.interactions_map[2]: | ||
assert bspline_config_larger_cutoff.r_min_map[i] == 0 | ||
assert bspline_config_larger_cutoff.r_max_map[i] == 6 | ||
assert bspline_config_larger_cutoff.resolution_map[i] == 15 | ||
|
||
for i in bspline_config_larger_cutoff.interactions_map[3]: | ||
assert bspline_config_larger_cutoff.r_min_map[i] == [0,0,0] | ||
assert bspline_config_larger_cutoff.r_max_map[i] == [4, 4, 8] | ||
assert bspline_config_larger_cutoff.resolution_map[i] == [10,10,20] | ||
|
||
|
||
def test_get_possible_lower_cutoffs(self,bspline_config_larger_cutoff): | ||
cutoff_dict = optimize.get_possible_lower_cutoffs(bspline_config_larger_cutoff) | ||
|
||
assert np.allclose(cutoff_dict['rmax_2b_poss'],np.array([0.4, 0.8, 1.2,\ | ||
1.6, 2. , 2.4, 2.8, 3.2, 3.6, 4. , 4.4, 4.8, 5.2, 5.6, 6. ])) | ||
|
||
assert np.allclose(cutoff_dict['rmax_3b_poss'],np.array([0.8, 1.6, 2.4, 3.2, 4. ])) | ||
|
||
def test_drop_columns(self,bspline_config_larger_cutoff,Nb_Sn_chemistry, Nb3Sn_geom): | ||
|
||
bspline_handler_larger_cutoff = BasisFeaturizer(bspline_config_larger_cutoff) | ||
|
||
columns_to_drop_2b = optimize.get_columns_to_drop_2b(original_bspline_config=bspline_config_larger_cutoff, | ||
modify_2b_cutoff=4.4, | ||
knot_spacing=0.4) | ||
|
||
columns_to_drop_3b = optimize.get_columns_to_drop_3b(original_bspline_config=bspline_config_larger_cutoff, | ||
modify_3b_cutoff=3.2, | ||
knot_spacing=0.4) | ||
|
||
columns_to_drop = columns_to_drop_2b + columns_to_drop_3b | ||
|
||
bspline_config_smaller_cutoff = optimize.get_bspline_config(Nb_Sn_chemistry, rmin=0, | ||
rmax_2b=4.4, rmax_3b=3.2, | ||
knot_spacing=0.4) | ||
bspline_handler_smaller_cutoff = BasisFeaturizer(bspline_config_smaller_cutoff) | ||
|
||
assert len(bspline_handler_larger_cutoff.columns) - len(columns_to_drop) == len(bspline_handler_smaller_cutoff.columns) | ||
|
||
energy_feature_larger_cutoff = bspline_handler_larger_cutoff.evaluate_configuration(geom=Nb3Sn_geom,energy=-69.44185169) | ||
energy_feature_smaller_cutoff = bspline_handler_smaller_cutoff.evaluate_configuration(geom=Nb3Sn_geom,energy=-69.44185169) | ||
|
||
index_to_drop = np.where(np.isin(bspline_handler_larger_cutoff.columns,columns_to_drop))[0] | ||
energy_feature_from_larger_cutoff = np.delete(energy_feature_larger_cutoff['energy'],index_to_drop) | ||
|
||
assert np.allclose(energy_feature_smaller_cutoff['energy'],energy_feature_from_larger_cutoff) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,200 @@ | ||
""" | ||
This module provides functions for optimizing the cutoffs of 2-body and 3-body | ||
terms. If a feature file exist with a higher cutoff, features with lower cutoffs | ||
can be constructed by droping appropriate columns. The functions provided only | ||
work when used with uniform knot spacing. | ||
""" | ||
|
||
import numpy as np | ||
from uf3.representation import bspline | ||
|
||
def get_bspline_config(chemical_system, | ||
rmin: float, | ||
rmax_2b: float, | ||
rmax_3b: float, | ||
knot_spacing: float): | ||
""" | ||
Function for getting bspline_config object. We recommend using this function | ||
to get bspling_config object for- | ||
1. creating the HDF5 file with larger cutoffs | ||
2. fitting model with lower cutoffs | ||
The rmax_2b and rmax_3b values in the returned bspline_config object can be | ||
slightly different depending on the knot_spacing | ||
Args: | ||
chemical_system: chemical_system | ||
rmax_2b (float): Two-body cutoff | ||
rmax_3b (float): Three-body cutoff. If the 3-body interaction is A-B-C, then | ||
rmax_3b is the distance between A-B (or equivalently A-C) i.e the first or | ||
second term in the 3-body rmax of bspline_config. | ||
knot_spacing (float): knot_spacing used to create the HDF5 file | ||
Returns: | ||
bspline_config: | ||
""" | ||
if rmin!=0: | ||
raise ValueError("Currrent version is only tested for rmin=0") | ||
|
||
if (rmax_2b-rmin)%knot_spacing!=0: | ||
if not (np.isclose((rmax_2b-rmin)%knot_spacing,knot_spacing) or \ | ||
np.isclose((rmax_2b-rmin)%knot_spacing,0)): | ||
raise ValueError("Provided rmax_2b does not conatin integer number of \n\ | ||
knots, seperated by knot_spacing") | ||
|
||
if (rmax_3b-rmin)%knot_spacing!=0: | ||
if not (np.isclose((rmax_3b-rmin)%knot_spacing,knot_spacing) or \ | ||
np.isclose((rmax_3b-rmin)%knot_spacing,0)): | ||
raise ValueError("Provided rmax_3b does not conatin integer number of \n\ | ||
knots, seperated by knot_spacing") | ||
|
||
rmax_3b_double = rmax_3b*2 | ||
if not(np.isclose((rmax_3b_double%knot_spacing),0) or \ | ||
np.isclose((rmax_3b_double%knot_spacing),knot_spacing)): | ||
raise ValueError("Provided rmax_3b contains integer number of knots sperated \n\ | ||
by knot_spacing, but rmax_3b_double does not. Consider changing rmax_3b \n\ | ||
to "+str(rmax_3b+knot_spacing)) | ||
|
||
reso_2b = round((rmax_2b - rmin)/knot_spacing) | ||
reso_3b = round((rmax_3b - rmin)/knot_spacing) | ||
|
||
reso_3b_double = round(reso_3b*2) | ||
|
||
r_min_map = {i:rmin for i in chemical_system.interactions_map[2]} | ||
r_min_map.update({i:[rmin,rmin,rmin] for i in chemical_system.interactions_map[3]}) | ||
|
||
r_max_map = {i:rmax_2b for i in chemical_system.interactions_map[2]} | ||
r_max_map.update({i:[rmax_3b,rmax_3b,rmax_3b_double] for i in chemical_system.interactions_map[3]}) | ||
|
||
trailing_trim = 3 | ||
leading_trim = 0 | ||
|
||
resolution_map = {i:reso_2b for i in chemical_system.interactions_map[2]} | ||
resolution_map.update({i:[reso_3b,reso_3b,reso_3b_double] for i in chemical_system.interactions_map[3]}) | ||
|
||
bspline_config = bspline.BSplineBasis(chemical_system, r_min_map=r_min_map, | ||
r_max_map=r_max_map,resolution_map=resolution_map, | ||
trailing_trim=trailing_trim,leading_trim=leading_trim) | ||
|
||
return bspline_config | ||
|
||
|
||
def get_possible_lower_cutoffs(original_bspline_config): | ||
""" | ||
Function for getting cutoff values obtainable by droping columns of HDF5 file | ||
Args: | ||
original_bspline_config: bspline_config used to create the HDF5 feature file. | ||
This file was produced with larger cutoff. | ||
Returns: | ||
Dict: {"rmax_2b_poss":rmax_2b_poss, "rmax_3b_poss":rmax_3b_poss} | ||
rmax_2b_poss (list): List of possible 2-body cutoffs | ||
rmax_3b_poss (list): List of possible 3-body cutoffs | ||
""" | ||
interaction_2b = original_bspline_config.interactions_map[2][0] | ||
interaction_3b = original_bspline_config.interactions_map[3][0] | ||
|
||
rmax_2b_poss = original_bspline_config.knots_map[interaction_2b][4:-3] | ||
rmax_3b_poss = original_bspline_config.knots_map[interaction_3b][0][4:-3][1::2] | ||
|
||
for i in range(rmax_2b_poss.shape[0]): | ||
if rmax_2b_poss[i] not in original_bspline_config.knots_map[interaction_2b]: | ||
raise ValueError("Internal check failed-->2B!!") | ||
|
||
for i in range(rmax_3b_poss.shape[0]): | ||
if rmax_3b_poss[i] not in original_bspline_config.knots_map[interaction_3b][0]: | ||
raise ValueError("Internal check failed-->3B_0!!") | ||
|
||
for i in range(rmax_3b_poss.shape[0]): | ||
if rmax_3b_poss[i] not in original_bspline_config.knots_map[interaction_3b][1]: | ||
raise ValueError("Internal check failed-->3B_1!!") | ||
|
||
for i in range(rmax_3b_poss.shape[0]): | ||
if 2*rmax_3b_poss[i] not in original_bspline_config.knots_map[interaction_3b][2]: | ||
raise ValueError("Internal check failed-->3B_2!!") | ||
|
||
return {"rmax_2b_poss":rmax_2b_poss, "rmax_3b_poss":rmax_3b_poss} | ||
|
||
|
||
|
||
def get_columns_to_drop_2b(original_bspline_config, | ||
modify_2b_cutoff: float, | ||
knot_spacing: float): | ||
""" | ||
Function for getting appropriate 2-body feature columns to drop for fitting | ||
to lower cutoffs | ||
Args: | ||
original_bspline_config: bspline_config used to create the HDF5 feature file. | ||
This file was produced with larger cutoff. | ||
modify_2b_cutoff (float): Intended 2-body cutoff | ||
knot_spacing (float): knot_spacing used to create the HDF5 file | ||
Returns: | ||
columns_to_drop_2b (list): Should be passed to drop_columns argument of | ||
fit_from_file | ||
""" | ||
column_names = original_bspline_config.get_column_names() | ||
interaction_partitions_num = original_bspline_config.get_interaction_partitions()[0] | ||
interaction_partitions_posn = original_bspline_config.get_interaction_partitions()[1] | ||
|
||
columns_to_drop_2b = [] | ||
for interaction in original_bspline_config.interactions_map[2]: | ||
num_columns_to_drop_2b = round((original_bspline_config.knots_map[interaction][-4]-modify_2b_cutoff)/knot_spacing) | ||
|
||
start_ind_2b = 1+interaction_partitions_posn[interaction] | ||
end_ind_2b = start_ind_2b+interaction_partitions_num[interaction] | ||
columns_to_drop_2b.extend(column_names[end_ind_2b-num_columns_to_drop_2b-3:end_ind_2b-3]) | ||
return columns_to_drop_2b | ||
|
||
def get_columns_to_drop_3b(original_bspline_config, | ||
modify_3b_cutoff: float, | ||
knot_spacing: float): | ||
""" | ||
Function for getting appropriate 2-body feature columns to drop for fitting | ||
to lower cutoffs | ||
Args: | ||
original_bspline_config: bspline_config used to create the HDF5 feature file. | ||
This file was produced with larger cutoff. | ||
modify_3b_cutoff (float): Intended 3-body cutoff. If the 3-body interaction | ||
is A-B-C, then modify_3b_cutoff is the maximum distance between A-B (or A-C) | ||
i.e the first or second term in the 3-body rmax of bspline_config. | ||
knot_spacing (float): knot_spacing used to create the HDF5 file | ||
Returns: | ||
columns_to_drop_3b (list): Should be passed to drop_columns argument of | ||
fit_from_file | ||
""" | ||
column_names = original_bspline_config.get_column_names() | ||
interaction_partitions_num = original_bspline_config.get_interaction_partitions()[0] | ||
interaction_partitions_posn = original_bspline_config.get_interaction_partitions()[1] | ||
|
||
columns_to_drop_3b = [] | ||
for interaction in original_bspline_config.interactions_map[3]: | ||
num_columns_to_drop_3b = round((original_bspline_config.knots_map[interaction][0][-4]-modify_3b_cutoff)/knot_spacing) | ||
num_columns_to_drop_3b_double = int(num_columns_to_drop_3b*2) | ||
start_ind_3b = 1+interaction_partitions_posn[interaction] | ||
end_ind_3b = start_ind_3b + interaction_partitions_num[interaction] | ||
|
||
l_space, m_space, n_space = original_bspline_config.knots_map[interaction] | ||
L = len(l_space) - 4 | ||
M = len(m_space) - 4 | ||
N = len(n_space) - 4 | ||
grid_c = np.chararray((L, M, N)) | ||
grid_c[:] = '0' | ||
grid_c = np.char.multiply(grid_c,16) | ||
grid_c.flat[original_bspline_config.template_mask[interaction]] = column_names[start_ind_3b:end_ind_3b] | ||
|
||
grid_c = np.delete(grid_c,np.s_[grid_c.shape[2]-3-num_columns_to_drop_3b_double:grid_c.shape[2]-3],axis=2) #4 | ||
grid_c = np.delete(grid_c,np.s_[grid_c.shape[1]-3-num_columns_to_drop_3b:grid_c.shape[1]-3],axis=1) #2 | ||
grid_c = np.delete(grid_c,np.s_[grid_c.shape[0]-3-num_columns_to_drop_3b:grid_c.shape[0]-3],axis=0) #2 | ||
|
||
columns_to_keep = grid_c[grid_c[:] != b'0000000000000000'].astype('<U16') | ||
|
||
columns_to_keep_index_3b = np.where(np.isin(column_names[start_ind_3b:end_ind_3b],columns_to_keep))[0] | ||
|
||
column_to_drop = np.setdiff1d(column_names[start_ind_3b:end_ind_3b],columns_to_keep) | ||
column_to_drop_index = np.where(np.isin(column_names[start_ind_3b:end_ind_3b],column_to_drop))[0] | ||
|
||
columns_to_drop_3b.extend(column_to_drop) | ||
return columns_to_drop_3b |