From 95ad6df63d33510ec867ffc46c564e02520ccb1e Mon Sep 17 00:00:00 2001 From: Justin Date: Fri, 15 Nov 2024 23:15:24 -0700 Subject: [PATCH 1/6] refactor correlated_values and correlated_values_norm --- uncertainties/core.py | 136 ++++++++++++++++++++++-------------------- 1 file changed, 72 insertions(+), 64 deletions(-) diff --git a/uncertainties/core.py b/uncertainties/core.py index c521a87f..f5e14ffb 100644 --- a/uncertainties/core.py +++ b/uncertainties/core.py @@ -107,29 +107,74 @@ def correlated_values(nom_values, covariance_mat, tags=None): "correlated_values is unavailable." ) raise NotImplementedError(msg) - # !!! It would in principle be possible to handle 0 variance - # variables by first selecting the sub-matrix that does not contain - # such variables (with the help of numpy.ix_()), and creating - # them separately. - - std_devs = numpy.sqrt(numpy.diag(covariance_mat)) - - # For numerical stability reasons, we go through the correlation - # matrix, because it is insensitive to any change of scale in the - # quantities returned. However, care must be taken with 0 variance - # variables: calculating the correlation matrix cannot be simply done - # by dividing by standard deviations. We thus use specific - # normalization values, with no null value: - norm_vector = std_devs.copy() - norm_vector[norm_vector == 0] = 1 - - return correlated_values_norm( - # !! The following zip() is a bit suboptimal: correlated_values() - # separates back the nominal values and the standard deviations: - list(zip(nom_values, std_devs)), - covariance_mat / norm_vector / norm_vector[:, numpy.newaxis], - tags, - ) + covariance_mat = numpy.array(covariance_mat) + + if not numpy.all(numpy.isreal(covariance_mat)): + raise ValueError("covariance_mat must be real.") + if not numpy.all( + numpy.isclose( + covariance_mat, + numpy.transpose(covariance_mat), + ) + ): + raise ValueError("covariance_mat must be symmetric.") + + n_dims = covariance_mat.shape[0] + if tags is None: + tags = [None] * n_dims + + ufloat_atoms = numpy.array([Variable(0, 1, tags[dim]) for dim in range(n_dims)]) + + try: + """ + Covariance matrices for linearly independent random variables are + symmetric and positive-definite so they can be decomposed sa + C = L * L.T + + with L a lower triangular matrix. + Let R be a vector of independent random variables with zero mean and + unity variance. Then consider + Y = L * R + and + Cov(Y) = E[Y * Y.T] = E[L * R * R.T * L.T] = L * E[R * R.t] * L.T + = L * Cov(R) * L.T = L * I * L.T = L * L.T = C + where Cov(R) = I because the random variables in V are independent with + unity variance. So Y defined as above has covariance C. + """ + L = numpy.linalg.cholesky(covariance_mat) + Y = L @ ufloat_atoms + except numpy.linalg.LinAlgError: + """" + If two random variables are linearly dependent, e.g. x and y=2*x, then + their covariance matrix will be degenerate. In this case, a Cholesky + decomposition is not possible, but an eigenvalue decomposition is. Even + in this case, covariance matrices are symmetric, so they can be + decomposed as + + C = U V U^T + + with U orthogonal and V diagonal with non-negative (though possibly + zero-valued) entries. Let S = sqrt(V) and + Y = U * S * R + Then + Cov(Y) = E[Y * Y.T] = E[U * S * R * R.T * S.T * U.T] + = U * S * E[R * R.T] * S.T * U.T + = U * S * I * S.T * U.T + = U * S * S.T * U.T = U * V * U.T + = C + So Y defined as above has covariance C. + """ + eig_vals, eig_vecs = numpy.linalg.eigh(covariance_mat) + """ + Eigenvalues may be close to zero but negative. We clip these + to zero. + """ + eig_vals = numpy.clip(eig_vals, a_min=0, a_max=None) + std_devs = numpy.diag(numpy.sqrt(eig_vals)) + Y = numpy.transpose(eig_vecs @ std_devs @ ufloat_atoms) + + result = numpy.array(nom_values) + Y + return result def correlated_values_norm(values_with_std_dev, correlation_mat, tags=None): @@ -165,49 +210,12 @@ def correlated_values_norm(values_with_std_dev, correlation_mat, tags=None): ) raise NotImplementedError(msg) - # If no tags were given, we prepare tags for the newly created - # variables: - if tags is None: - tags = (None,) * len(values_with_std_dev) - - (nominal_values, std_devs) = numpy.transpose(values_with_std_dev) - - # We diagonalize the correlation matrix instead of the - # covariance matrix, because this is generally more stable - # numerically. In fact, the covariance matrix can have - # coefficients with arbitrary values, through changes of units - # of its input variables. This creates numerical instabilities. - # - # The covariance matrix is diagonalized in order to define - # the independent variables that model the given values: - (variances, transform) = numpy.linalg.eigh(correlation_mat) - - # Numerical errors might make some variances negative: we set - # them to zero: - variances[variances < 0] = 0.0 - - # Creation of new, independent variables: + nominal_values, std_devs = tuple(zip(*values_with_std_dev)) - # We use the fact that the eigenvectors in 'transform' are - # special: 'transform' is unitary: its inverse is its transpose: - - variables = tuple( - # The variables represent "pure" uncertainties: - Variable(0, sqrt(variance), tag) - for (variance, tag) in zip(variances, tags) - ) - - # The coordinates of each new uncertainty as a function of the - # new variables must include the variable scale (standard deviation): - transform *= std_devs[:, numpy.newaxis] - - # Representation of the initial correlated values: - values_funcs = tuple( - AffineScalarFunc(value, LinearCombination(dict(zip(variables, coords)))) - for (coords, value) in zip(transform, nominal_values) - ) + outer_std_devs = numpy.outer(std_devs, std_devs) + cov_mat = correlation_mat * outer_std_devs - return values_funcs + return correlated_values(nominal_values, cov_mat) def correlation_matrix(nums_with_uncert): From 004d3234f416d838e23eee8bce0ce38a6bc1df1a Mon Sep 17 00:00:00 2001 From: Justin Date: Sat, 16 Nov 2024 00:05:14 -0700 Subject: [PATCH 2/6] docstrings --- uncertainties/core.py | 144 +++++++++++++++++++++--------------------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/uncertainties/core.py b/uncertainties/core.py index f5e14ffb..b0df3395 100644 --- a/uncertainties/core.py +++ b/uncertainties/core.py @@ -74,39 +74,27 @@ def correlated_values(nom_values, covariance_mat, tags=None): """ - Return numbers with uncertainties (AffineScalarFunc objects) - that correctly reproduce the given covariance matrix, and have - the given (float) values as their nominal value. + Return numbers with uncertainties (AffineScalarFunc objects) that have nominal + values given by nom_values and are correlated according to covariance_mat. - The correlated_values_norm() function returns the same result, - but takes a correlation matrix instead of a covariance matrix. + nom_values -- length (N,) sequence of (real) nominal values for the returned numbers + with uncertainties. - The list of values and the covariance matrix must have the - same length, and the matrix must be a square (symmetric) one. + covariance_mat -- (N, N) array representing the target covariance matrix for the + returned numbers with uncertainties. This matrix must be positive semi-definite. - The numbers with uncertainties returned depend on newly - created, independent variables (Variable objects). + tags -- optional length (N,) sequence of strings to use as tags for the variables on + which the returned numbers with uncertainties depend. - nom_values -- sequence with the nominal (real) values of the - numbers with uncertainties to be returned. - - covariance_mat -- full covariance matrix of the returned numbers with - uncertainties. For example, the first element of this matrix is the - variance of the first number with uncertainty. This matrix must be a - NumPy array-like (list of lists, NumPy array, etc.). - - tags -- if 'tags' is not None, it must list the tag of each new - independent variable. - - This function raises NotImplementedError if numpy cannot be - imported. + This function raises NotImplementedError if numpy cannot be imported. """ if numpy is None: msg = ( - "uncertainties was not able to import numpy so " - "correlated_values is unavailable." + "uncertainties was not able to import numpy so correlated_values is " + "unavailable." ) raise NotImplementedError(msg) + covariance_mat = numpy.array(covariance_mat) if not numpy.all(numpy.isreal(covariance_mat)): @@ -123,55 +111,64 @@ def correlated_values(nom_values, covariance_mat, tags=None): if tags is None: tags = [None] * n_dims - ufloat_atoms = numpy.array([Variable(0, 1, tags[dim]) for dim in range(n_dims)]) + X = numpy.array([Variable(0, 1, tags[dim]) for dim in range(n_dims)]) try: """ - Covariance matrices for linearly independent random variables are - symmetric and positive-definite so they can be decomposed sa + Covariance matrices for linearly independent random variables are symmetric and + positive-definite so they can be Cholesky decomposed as + C = L * L.T - with L a lower triangular matrix. - Let R be a vector of independent random variables with zero mean and - unity variance. Then consider - Y = L * R + with L a lower triangular matrix. Let X be a vector of independent random + variables with zero mean and unity variance. Then consider + + Y = L * X + and - Cov(Y) = E[Y * Y.T] = E[L * R * R.T * L.T] = L * E[R * R.t] * L.T - = L * Cov(R) * L.T = L * I * L.T = L * L.T = C - where Cov(R) = I because the random variables in V are independent with - unity variance. So Y defined as above has covariance C. + + Cov(Y) = E[Y * Y.T] = E[L * X * X.T * L.T] = L * E[X * X.t] * L.T + = L * Cov(X) * L.T = L * I * L.T = L * L.T = C + + where Cov(X) = I because the random variables in X are independent with + unity variance. So Y = L * X has covariance C. """ L = numpy.linalg.cholesky(covariance_mat) - Y = L @ ufloat_atoms + Y = L @ X except numpy.linalg.LinAlgError: """" - If two random variables are linearly dependent, e.g. x and y=2*x, then - their covariance matrix will be degenerate. In this case, a Cholesky - decomposition is not possible, but an eigenvalue decomposition is. Even - in this case, covariance matrices are symmetric, so they can be - decomposed as + If two random variables are linearly dependent, e.g. x and y=2*x, then their + covariance matrix will be degenerate. In this case, a Cholesky decomposition is + not possible, but an eigenvalue decomposition is. Even in this case, covariance + matrices are symmetric, so they can be decomposed as - C = U V U^T + C = U D U^T + + with U orthogonal and D diagonal with non-negative (though possibly + zero-valued) entries. Let S = sqrt(D) and + + Y = U * S * X - with U orthogonal and V diagonal with non-negative (though possibly - zero-valued) entries. Let S = sqrt(V) and - Y = U * S * R Then - Cov(Y) = E[Y * Y.T] = E[U * S * R * R.T * S.T * U.T] - = U * S * E[R * R.T] * S.T * U.T + + Cov(Y) = E[Y * Y.T] = E[U * S * X * X.T * S.T * U.T] + = U * S * E[X * X.T] * S.T * U.T = U * S * I * S.T * U.T - = U * S * S.T * U.T = U * V * U.T + = U * S * S.T * U.T = U * D * U.T = C - So Y defined as above has covariance C. + + So Y = U * S * X defined as above has covariance C. """ - eig_vals, eig_vecs = numpy.linalg.eigh(covariance_mat) + eig_vals, U = numpy.linalg.eigh(covariance_mat) """ Eigenvalues may be close to zero but negative. We clip these to zero. """ eig_vals = numpy.clip(eig_vals, a_min=0, a_max=None) - std_devs = numpy.diag(numpy.sqrt(eig_vals)) - Y = numpy.transpose(eig_vecs @ std_devs @ ufloat_atoms) + if numpy.any(eig_vals < 0): + raise ValueError("covariance_mat must be positive semi-definite.") + S = numpy.diag(numpy.sqrt(eig_vals)) + Y = numpy.transpose(U @ S @ X) result = numpy.array(nom_values) + Y return result @@ -179,29 +176,22 @@ def correlated_values(nom_values, covariance_mat, tags=None): def correlated_values_norm(values_with_std_dev, correlation_mat, tags=None): """ - Return correlated values like correlated_values(), but takes - instead as input: - - - nominal (float) values along with their standard deviation, and - - a correlation matrix (i.e. a normalized covariance matrix). + Return numbers with uncertainties (AffineScalarFunc objects) that have nominal + values and standard deviations given by values_with_std_dev and whose correlation + matrix is given by correlation_mat. - values_with_std_dev -- sequence of (nominal value, standard - deviation) pairs. The returned, correlated values have these - nominal values and standard deviations. + values_with_std_dev -- Length (N,) sequence of tuples of (nominal_value, std_dev) + corresponding to the nominal values and standard deviations of the returned numbers + with uncertainty. std_devs must be non-negative. - correlation_mat -- correlation matrix between the given values, except - that any value with a 0 standard deviation must have its correlations - set to 0, with a diagonal element set to an arbitrary value (something - close to 0-1 is recommended, for a better numerical precision). When - no value has a 0 variance, this is the covariance matrix normalized by - standard deviations, and thus a symmetric matrix with ones on its - diagonal. This matrix must be an NumPy array-like (list of lists, - NumPy array, etc.). + correlation_mat -- (N, N) array representing the target normalized correlation + matrix between the returned values with uncertainty. This matrix must be positive + semi-definite. - tags -- like for correlated_values(). + tags -- optional length (N,) sequence of strings to use as tags for the variables on + which the returned numbers with uncertainties depend. - This function raises NotImplementedError if numpy cannot be - imported. + This function raises NotImplementedError if numpy cannot be imported. """ if numpy is None: msg = ( @@ -211,11 +201,21 @@ def correlated_values_norm(values_with_std_dev, correlation_mat, tags=None): raise NotImplementedError(msg) nominal_values, std_devs = tuple(zip(*values_with_std_dev)) + if any(numpy.array(std_devs) < 0): + raise ValueError("All std_devs must be non-negative.") + + """ + The entries Corr[i, j] of the correlation matrix are related to the entries + Cov[i, j] by + Cov[i, j] = Corr[i, j] * s[i] * s[j] + + where s[i] is the standard deviation of random variable i. + """ outer_std_devs = numpy.outer(std_devs, std_devs) cov_mat = correlation_mat * outer_std_devs - return correlated_values(nominal_values, cov_mat) + return correlated_values(nominal_values, cov_mat, tags) def correlation_matrix(nums_with_uncert): From 418326bbf5a4b6c0473095b4a7c33b297b9a3510 Mon Sep 17 00:00:00 2001 From: Justin Date: Sat, 16 Nov 2024 00:15:31 -0700 Subject: [PATCH 3/6] test error cases --- tests/test_uncertainties.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/test_uncertainties.py b/tests/test_uncertainties.py index 0cb81807..20cbc685 100644 --- a/tests/test_uncertainties.py +++ b/tests/test_uncertainties.py @@ -1278,6 +1278,28 @@ def test_correlated_values(): assert uarrays_close(cov, numpy.array(uncert_core.covariance_matrix(variables))) + nom_values = [0, 0, 0] + covariance_mat = numpy.diag([1, 1, -1]) + with pytest.raises( + ValueError, + match="must be positive semi-definite.", + ): + x, y, z = correlated_values(nom_values, covariance_mat) + + covariance_mat = numpy.array([[1, 0, 1], [0, 1, 0], [0, 0, 1]]) + with pytest.raises( + ValueError, + match="must be symmetric.", + ): + x, y, z = correlated_values(nom_values, covariance_mat) + + covariance_mat = numpy.array([[1, 0, 0], [0, 1j, 0], [0, 0, 1]]) + with pytest.raises( + ValueError, + match="must be real.", + ): + x, y, z = correlated_values(nom_values, covariance_mat) + def test_correlated_values_correlation_mat(): """ Tests the input of correlated value. @@ -1327,6 +1349,11 @@ def test_correlated_values_correlation_mat(): numpy.array(uncert_core.covariance_matrix([x2, y2, z2])), ) + values_with_std_dev = ((0, 1), (0, 1), (0, -1)) + correlation_mat = np.diag((1, 1, 1)) + with pytest.raises(ValueError, match="must be non-negative."): + x, y, z = correlated_values_norm(values_with_std_dev, correlation_mat) + @pytest.mark.skipif( np is not None, From 64afcfb006d12e8088e63944f6c71ed23cedab7d Mon Sep 17 00:00:00 2001 From: Justin Date: Sat, 16 Nov 2024 00:15:45 -0700 Subject: [PATCH 4/6] fix negative eigenvalue handling --- uncertainties/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncertainties/core.py b/uncertainties/core.py index b0df3395..b54b120e 100644 --- a/uncertainties/core.py +++ b/uncertainties/core.py @@ -160,11 +160,11 @@ def correlated_values(nom_values, covariance_mat, tags=None): So Y = U * S * X defined as above has covariance C. """ eig_vals, U = numpy.linalg.eigh(covariance_mat) + eig_vals[numpy.isclose(eig_vals, 0)] = 0 """ Eigenvalues may be close to zero but negative. We clip these to zero. """ - eig_vals = numpy.clip(eig_vals, a_min=0, a_max=None) if numpy.any(eig_vals < 0): raise ValueError("covariance_mat must be positive semi-definite.") S = numpy.diag(numpy.sqrt(eig_vals)) From e339f37ede38ad88f78edc1d4bbdff4652be97c2 Mon Sep 17 00:00:00 2001 From: Justin Date: Sat, 16 Nov 2024 00:22:42 -0700 Subject: [PATCH 5/6] Changelog --- CHANGES.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index c9263ab6..07e6ca2d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,12 +6,17 @@ Unreleased Changes +- Use a Cholesky decomposition to calculate values with uncertainty using + `correlated_values` and `correlated_values_norm` when the provided covariance matrix + or correlation matrix is positive definite. If the provided matrix is strictly + positive semi-definite an eigenvalue decomposition is still used. Refactor the code + and clean up the documentation for these functions. (#99) - Changed how `numpy` is handled as an optional dependency. Previously, importing a `numpy`-dependent function, like `correlated_values`, without `numpy` installed would result in an `ImportError` at import time. Now such a function can be imported but if the user attempts to execute it, a `NotImplementedError` is raised indicating that the - function can't be used because `numpy` couldn't be imported. + function can't be used because `numpy` couldn't be imported. (#267) Fixes: From 2f7d1b566c85041aa9e27008939a717adbb4f3bd Mon Sep 17 00:00:00 2001 From: Justin Date: Sat, 16 Nov 2024 00:24:41 -0700 Subject: [PATCH 6/6] changelog --- CHANGES.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 07e6ca2d..3d9e8547 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -9,8 +9,10 @@ Changes - Use a Cholesky decomposition to calculate values with uncertainty using `correlated_values` and `correlated_values_norm` when the provided covariance matrix or correlation matrix is positive definite. If the provided matrix is strictly - positive semi-definite an eigenvalue decomposition is still used. Refactor the code - and clean up the documentation for these functions. (#99) + positive semi-definite an eigenvalue decomposition is still used. Explicit exceptions + are raised if the user-provided matrix is non-real, non-symmetric, or non positive + semi-definite. The code and documentation for the two functions has been cleaned up. + (#99) - Changed how `numpy` is handled as an optional dependency. Previously, importing a `numpy`-dependent function, like `correlated_values`, without `numpy` installed would result in an `ImportError` at import