forked from joezuntz/cosmosis-standard-library
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add SN likelihood to support sim data.
- Loading branch information
Showing
1 changed file
with
202 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,202 @@ | ||
""" | ||
The likelihood module for the DES Y5 SN simulated dataset | ||
author: Sujeong Lee | ||
""" | ||
|
||
from cosmosis.gaussian_likelihood import GaussianLikelihood | ||
from cosmosis.datablock import names | ||
import os | ||
import numpy as np | ||
import pandas as pd | ||
|
||
# Default is to use DES only SN. To use DES+LOWZ SN, set the data_file | ||
# and covmat_file parameters in the ini file | ||
# default sim datavector and cov are generated by Ryan Camilleri | ||
default_data_file = os.path.join(os.path.split(__file__)[0], "DESONLY/hubble_diagram.txt") | ||
default_covmat_file = os.path.join(os.path.split(__file__)[0], "DESONLY/covsys_000.txt") | ||
|
||
def cov_log_likelihood(mu_model, mu, inv_cov): | ||
""" | ||
From Ryan Camilleri | ||
Computes modified likelihood computation to marginalize offset M | ||
from https://arxiv.org/abs/astro-ph/0104009v1, see Equation A9-12 | ||
""" | ||
delta = np.array([mu_model - mu]) | ||
deltaT = np.transpose(delta) | ||
chit2 = np.sum(delta @ inv_cov @ deltaT) | ||
B = np.sum(delta @ inv_cov) | ||
C = np.sum(inv_cov) | ||
chi2 = chit2 - (B**2 / C) + np.log(C / (2* np.pi)) | ||
return -0.5*chi2 | ||
|
||
class DESY5SNLikelihood(GaussianLikelihood): | ||
x_section = names.distances | ||
x_name = "z" | ||
y_section = names.distances | ||
y_name = "D_A" | ||
like_name = "desy5sn" | ||
|
||
def build_data(self): | ||
""" | ||
Run once at the start to load in the data vectors. | ||
Returns x, y where x is the independent variable (redshift in this case) | ||
and y is the Gaussian-distribured measured variable (magnitude in this case). | ||
""" | ||
filename = self.options.get_string("data_file", default=default_data_file) | ||
print("Loading DES Y5 SN data from {}".format(filename)) | ||
data = pd.read_csv(filename,delim_whitespace=True, comment='#') | ||
#import ipdb | ||
#ipdb.set_trace() | ||
self.origlen = len(data) | ||
# The only columns that we actually need here are the redshift, | ||
# distance modulus and distance modulus error | ||
|
||
self.ww = (data['zHD']>0.00) | ||
#use the vpec corrected redshift for zCMB | ||
self.zCMB = data['zHD'][self.ww] | ||
self.zHEL = data['zHEL'][self.ww] | ||
# distance modulus | ||
self.mu_obs = data['MU'][self.ww] | ||
|
||
# DES Y5 simulated SN data provides distance modulus. | ||
self.mu_obs_err = data['MUERR'][self.ww] | ||
|
||
# Return this to the parent class, which will use it | ||
# when working out the likelihood | ||
print(f"Found {len(self.zCMB)} DES SN 5 supernovae (or bins if you used the binned data file)") | ||
return self.zCMB, self.mu_obs | ||
|
||
def build_covariance(self): | ||
"""Run once at the start to build the covariance matrix for the data""" | ||
filename = self.options.get_string("covmat_file", default=default_covmat_file) | ||
print("Loading DESY5 SN covariance from {}".format(filename)) | ||
# The file format for the covariance has the first line as an integer | ||
# indicating the number of covariance elements, and the the subsequent | ||
# lines being the elements. | ||
# This data file is just the systematic component of the covariance - | ||
# we also need to add in the statistical error on the magnitudes | ||
# that we loaded earlier | ||
f = open(filename) | ||
line = f.readline() | ||
n = int(line) | ||
C = np.zeros((n,n)) | ||
for i in range(n): | ||
for j in range(n): | ||
C[i,j] = float(f.readline()) | ||
|
||
# Now add in the statistical error to the diagonal | ||
for i in range(n): | ||
C[i,i] += self.mu_obs_err[i]**2 | ||
f.close() | ||
|
||
# Return the covariance; the parent class knows to invert this | ||
# later to get the precision matrix that we need for the likelihood. | ||
|
||
C = C[self.ww][:, self.ww] | ||
|
||
return C | ||
|
||
def extract_theory_points(self, block): | ||
""" | ||
Run once per parameter set to extract the mean vector that our | ||
data points are compared to. For the Hubble flow set, we compare to the | ||
cosmological model. For the calibrators we compare to the Cepheid distances. | ||
""" | ||
import scipy.interpolate | ||
|
||
# Pull out theory DA and z from the block. | ||
theory_x = block[self.x_section, self.x_name] | ||
theory_y = block[self.y_section, self.y_name] | ||
theory_ynew = self.zCMB * np.nan | ||
|
||
# Interpolation function of theory so we can evaluate at redshifts of the data | ||
f = scipy.interpolate.interp1d(theory_x, theory_y, kind=self.kind) | ||
|
||
zcmb = self.zCMB | ||
zhel = self.zHEL | ||
|
||
# distance modulus | ||
theory_ynew = 5.0*np.log10((1.0+zcmb)*(1.0+zhel)*np.atleast_1d(f(zcmb))) +25. | ||
|
||
# This offset M will be marginalized in the modified log likelihood computation | ||
M = block[names.supernova_params, "M"] | ||
return theory_ynew + M | ||
|
||
def do_likelihood(self, block): | ||
#get data x by interpolation | ||
x = np.atleast_1d(self.extract_theory_points(block)) | ||
mu = np.atleast_1d(self.data_y) | ||
|
||
#If covariance is a function of parameters, compute the | ||
#new one now. | ||
if not self.constant_covariance: | ||
self.cov = np.atleast_2d(self.extract_covariance(block)) | ||
self.inv_cov = np.atleast_2d(self.extract_inverse_covariance(block)) | ||
|
||
# #gaussian likelihood | ||
# d = x-mu | ||
# chi2 = np.einsum('i,ij,j', d, self.inv_cov, d) | ||
# chi2 = float(chi2) | ||
# like = -0.5*chi2 | ||
|
||
# SJ edit | ||
# start modified log-likelihood computation to marginalize offset M | ||
like = cov_log_likelihood(x, mu, self.inv_cov) | ||
chi2 = -2.0*like | ||
|
||
#It can be useful to save the chi^2 as well as the likelihood, | ||
#especially when the covariance is non-constant. | ||
block[names.data_vector, self.like_name+"_CHI2"] = chi2 | ||
block[names.data_vector, self.like_name+"_N"] = mu.size | ||
|
||
#if the covariance is a function of parameters then we must | ||
#account for this in the likelihood. | ||
if not self.constant_covariance: | ||
log_det = self.extract_covariance_log_determinant(block) | ||
else: | ||
log_det = self.log_det_constant | ||
|
||
norm = -0.5 * log_det | ||
like += norm | ||
block[names.data_vector, self.like_name+"_LOG_DET"] = float(log_det) | ||
block[names.data_vector, self.like_name+"_NORM"] = float(norm) | ||
|
||
# Numpy has started returning a 0D array in recent versions (1.14). | ||
# Convert this to a float. | ||
like = float(like) | ||
|
||
#Now save the resulting likelihood | ||
block[names.likelihoods, self.like_name+"_LIKE"] = like | ||
|
||
# For some very fast likelihoods the overhead from | ||
# the steps below is painful. Setting likelihood_only avoids that. | ||
if self.likelihood_only: | ||
return | ||
|
||
#And also the predicted data points - the vector of observables | ||
# that in a fisher approch we want the derivatives of. | ||
#and inverse cov mat which also goes into the fisher matrix. | ||
block[names.data_vector, self.like_name + "_theory"] = x | ||
block[names.data_vector, self.like_name + "_data"] = mu | ||
block[names.data_vector, self.like_name + "_inverse_covariance"] = self.inv_cov | ||
|
||
#We might just be calculating the inverse cov and ignoring the covmat. | ||
#in that case we do not try to save it | ||
if self.cov is not None: | ||
block[names.data_vector, self.like_name + "_covariance"] = self.cov | ||
#Also save a simulation of the data - the mean with added noise | ||
#these can be used among other places by the ABC sampler. | ||
#This also requires the cov mat. | ||
# If we have a parameter-dependent covariance we need | ||
# to re-calculate the Cholesky decomposition to simulate some data. | ||
if not self.constant_covariance: | ||
self.chol = np.linalg.cholesky(self.cov) | ||
sim = self.simulate_data_vector(x) | ||
block[names.data_vector, self.like_name + "_simulation"] = sim | ||
|
||
|
||
|
||
# This takes our class and turns it into | ||
setup, execute, cleanup = DESY5SNLikelihood.build_module() |