Skip to content

Commit

Permalink
Add SN likelihood to support sim data.
Browse files Browse the repository at this point in the history
  • Loading branch information
aferte committed May 28, 2024
1 parent daa6634 commit 4725e57
Showing 1 changed file with 202 additions and 0 deletions.
202 changes: 202 additions & 0 deletions likelihood/desy5sn/desy5sn_sim.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
"""
The likelihood module for the DES Y5 SN simulated dataset
author: Sujeong Lee
"""

from cosmosis.gaussian_likelihood import GaussianLikelihood
from cosmosis.datablock import names
import os
import numpy as np
import pandas as pd

# Default is to use DES only SN. To use DES+LOWZ SN, set the data_file
# and covmat_file parameters in the ini file
# default sim datavector and cov are generated by Ryan Camilleri
default_data_file = os.path.join(os.path.split(__file__)[0], "DESONLY/hubble_diagram.txt")
default_covmat_file = os.path.join(os.path.split(__file__)[0], "DESONLY/covsys_000.txt")

def cov_log_likelihood(mu_model, mu, inv_cov):
"""
From Ryan Camilleri
Computes modified likelihood computation to marginalize offset M
from https://arxiv.org/abs/astro-ph/0104009v1, see Equation A9-12
"""
delta = np.array([mu_model - mu])
deltaT = np.transpose(delta)
chit2 = np.sum(delta @ inv_cov @ deltaT)
B = np.sum(delta @ inv_cov)
C = np.sum(inv_cov)
chi2 = chit2 - (B**2 / C) + np.log(C / (2* np.pi))
return -0.5*chi2

class DESY5SNLikelihood(GaussianLikelihood):
x_section = names.distances
x_name = "z"
y_section = names.distances
y_name = "D_A"
like_name = "desy5sn"

def build_data(self):
"""
Run once at the start to load in the data vectors.
Returns x, y where x is the independent variable (redshift in this case)
and y is the Gaussian-distribured measured variable (magnitude in this case).
"""
filename = self.options.get_string("data_file", default=default_data_file)
print("Loading DES Y5 SN data from {}".format(filename))
data = pd.read_csv(filename,delim_whitespace=True, comment='#')
#import ipdb
#ipdb.set_trace()
self.origlen = len(data)
# The only columns that we actually need here are the redshift,
# distance modulus and distance modulus error

self.ww = (data['zHD']>0.00)
#use the vpec corrected redshift for zCMB
self.zCMB = data['zHD'][self.ww]
self.zHEL = data['zHEL'][self.ww]
# distance modulus
self.mu_obs = data['MU'][self.ww]

# DES Y5 simulated SN data provides distance modulus.
self.mu_obs_err = data['MUERR'][self.ww]

# Return this to the parent class, which will use it
# when working out the likelihood
print(f"Found {len(self.zCMB)} DES SN 5 supernovae (or bins if you used the binned data file)")
return self.zCMB, self.mu_obs

def build_covariance(self):
"""Run once at the start to build the covariance matrix for the data"""
filename = self.options.get_string("covmat_file", default=default_covmat_file)
print("Loading DESY5 SN covariance from {}".format(filename))
# The file format for the covariance has the first line as an integer
# indicating the number of covariance elements, and the the subsequent
# lines being the elements.
# This data file is just the systematic component of the covariance -
# we also need to add in the statistical error on the magnitudes
# that we loaded earlier
f = open(filename)
line = f.readline()
n = int(line)
C = np.zeros((n,n))
for i in range(n):
for j in range(n):
C[i,j] = float(f.readline())

# Now add in the statistical error to the diagonal
for i in range(n):
C[i,i] += self.mu_obs_err[i]**2
f.close()

# Return the covariance; the parent class knows to invert this
# later to get the precision matrix that we need for the likelihood.

C = C[self.ww][:, self.ww]

return C

def extract_theory_points(self, block):
"""
Run once per parameter set to extract the mean vector that our
data points are compared to. For the Hubble flow set, we compare to the
cosmological model. For the calibrators we compare to the Cepheid distances.
"""
import scipy.interpolate

# Pull out theory DA and z from the block.
theory_x = block[self.x_section, self.x_name]
theory_y = block[self.y_section, self.y_name]
theory_ynew = self.zCMB * np.nan

# Interpolation function of theory so we can evaluate at redshifts of the data
f = scipy.interpolate.interp1d(theory_x, theory_y, kind=self.kind)

zcmb = self.zCMB
zhel = self.zHEL

# distance modulus
theory_ynew = 5.0*np.log10((1.0+zcmb)*(1.0+zhel)*np.atleast_1d(f(zcmb))) +25.

# This offset M will be marginalized in the modified log likelihood computation
M = block[names.supernova_params, "M"]
return theory_ynew + M

def do_likelihood(self, block):
#get data x by interpolation
x = np.atleast_1d(self.extract_theory_points(block))
mu = np.atleast_1d(self.data_y)

#If covariance is a function of parameters, compute the
#new one now.
if not self.constant_covariance:
self.cov = np.atleast_2d(self.extract_covariance(block))
self.inv_cov = np.atleast_2d(self.extract_inverse_covariance(block))

# #gaussian likelihood
# d = x-mu
# chi2 = np.einsum('i,ij,j', d, self.inv_cov, d)
# chi2 = float(chi2)
# like = -0.5*chi2

# SJ edit
# start modified log-likelihood computation to marginalize offset M
like = cov_log_likelihood(x, mu, self.inv_cov)
chi2 = -2.0*like

#It can be useful to save the chi^2 as well as the likelihood,
#especially when the covariance is non-constant.
block[names.data_vector, self.like_name+"_CHI2"] = chi2
block[names.data_vector, self.like_name+"_N"] = mu.size

#if the covariance is a function of parameters then we must
#account for this in the likelihood.
if not self.constant_covariance:
log_det = self.extract_covariance_log_determinant(block)
else:
log_det = self.log_det_constant

norm = -0.5 * log_det
like += norm
block[names.data_vector, self.like_name+"_LOG_DET"] = float(log_det)
block[names.data_vector, self.like_name+"_NORM"] = float(norm)

# Numpy has started returning a 0D array in recent versions (1.14).
# Convert this to a float.
like = float(like)

#Now save the resulting likelihood
block[names.likelihoods, self.like_name+"_LIKE"] = like

# For some very fast likelihoods the overhead from
# the steps below is painful. Setting likelihood_only avoids that.
if self.likelihood_only:
return

#And also the predicted data points - the vector of observables
# that in a fisher approch we want the derivatives of.
#and inverse cov mat which also goes into the fisher matrix.
block[names.data_vector, self.like_name + "_theory"] = x
block[names.data_vector, self.like_name + "_data"] = mu
block[names.data_vector, self.like_name + "_inverse_covariance"] = self.inv_cov

#We might just be calculating the inverse cov and ignoring the covmat.
#in that case we do not try to save it
if self.cov is not None:
block[names.data_vector, self.like_name + "_covariance"] = self.cov
#Also save a simulation of the data - the mean with added noise
#these can be used among other places by the ABC sampler.
#This also requires the cov mat.
# If we have a parameter-dependent covariance we need
# to re-calculate the Cholesky decomposition to simulate some data.
if not self.constant_covariance:
self.chol = np.linalg.cholesky(self.cov)
sim = self.simulate_data_vector(x)
block[names.data_vector, self.like_name + "_simulation"] = sim



# This takes our class and turns it into
setup, execute, cleanup = DESY5SNLikelihood.build_module()

0 comments on commit 4725e57

Please sign in to comment.