Skip to content

Commit

Permalink
Compare series + fix correlations (#620)
Browse files Browse the repository at this point in the history
* new overlap function in tsbase.py

* added CI test

* debugged new correlation clause with pytest

see test_correlation_t4()

* CI test for multipleseries.correlation() with no overlap

* updated docstrings

* compare() method is functional; has docstring

* typo fix

* new overlap() method

used in Series.correlation and MultipleSeries.correlation

* unit tests for overlap()

* CI tests bug fixes
  • Loading branch information
CommonClimate authored Oct 18, 2024
1 parent a384df9 commit 84baeaa
Show file tree
Hide file tree
Showing 8 changed files with 305 additions and 100 deletions.
35 changes: 21 additions & 14 deletions pyleoclim/core/ensembleseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,8 +536,8 @@ def correlation(self, target=None, timespan=None, alpha=0.05, method = 'ttest',
The significance level (0.05 by default)
method : str, {'ttest','built-in','ar1sim','phaseran'}
method for significance testing. Default is 'ttest'
method : str, {'ttest','built-in','ar1sim','phaseran','CN'}
method for significance testing. Default is 'ttest' to lower computational cost, but this is not always the best choice
statistic : str
The name of the statistic used to measure the association, to be chosen from a subset of
Expand Down Expand Up @@ -576,12 +576,13 @@ def correlation(self, target=None, timespan=None, alpha=0.05, method = 'ttest',
See also
--------
pyleoclim.utils.correlation.corr_sig : Correlation function
pyleoclim.core.series.Series.correlation : pairwise correlations
pyleoclim.utils.correlation.fdr : False Discovery Rate
pyleoclim.core.correns.CorrEns : The correlation ensemble object
pyleoclim.core.surrogateseries.SurrogateSeries : surrogate series
Examples
--------
Expand Down Expand Up @@ -612,7 +613,7 @@ def correlation(self, target=None, timespan=None, alpha=0.05, method = 'ttest',
print(corr_res)
The `print` function tabulates the output, and conveys the p-value according
to the correlation test applied ("isospec", by default). To plot the result:
to the correlation test applied ("ttest", by default). To plot the result:
.. jupyter-execute::
Expand All @@ -630,16 +631,22 @@ def correlation(self, target=None, timespan=None, alpha=0.05, method = 'ttest',
if hasattr(target, 'series_list'):
nEns = np.size(target.series_list)
if idx < nEns:
value2 = target.series_list[idx].value
time2 = target.series_list[idx].time
ts2 = target.series_list[idx].copy()
#value2 = target.series_list[idx].value
#time2 = target.series_list[idx].time

else:
value2 = target.series_list[idx-nEns].value
time2 = target.series_list[idx-nEns].time
#value2 = target.series_list[idx-nEns].value
#time2 = target.series_list[idx-nEns].time
ts2 = target.series_list[idx-nEns].copy()
else:
value2 = target.value
time2 = target.time

ts2 = Series(time=time2, value=value2, verbose=idx==0, auto_time_params=False)
ts2 = target.copy()
#value2 = target.value
#time2 = target.time

#ts2 = target.series_list[idx].copy()
#ts2.time = time2; ts2.value = value2
#ts2 = Series(time=time2, value=value2, verbose=idx==0, auto_time_params=False)
corr_res = ts1.correlation(ts2, timespan=timespan, method=method,number=number,
statistic=statistic,
settings=settings, mute_pbar=True,
Expand Down
48 changes: 34 additions & 14 deletions pyleoclim/core/multipleseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import numpy as np
from copy import deepcopy

from matplotlib.ticker import (MultipleLocator, AutoMinorLocator, FormatStrFormatter)
from matplotlib.ticker import (AutoMinorLocator, FormatStrFormatter)
import matplotlib.transforms as transforms
import matplotlib as mpl
import matplotlib.pyplot as plt
Expand Down Expand Up @@ -735,6 +735,8 @@ def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearson
fdr_kwargs=None, common_time_kwargs=None, mute_pbar=False, seed=None):
''' Calculate the correlation between a MultipleSeries and a target Series
This function recursively applies Series.correlation() to members of the MultipleSeries object.
The significance of the correlation is assessed using one of the following methods:
1. 'ttest': T-test adjusted for effective sample size, see [1]
Expand All @@ -748,8 +750,9 @@ def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearson
The T-test is a parametric test, hence computationally cheap, but can only be performed in ideal circumstances.
The others are non-parametric, but their computational requirements scale with the number of simulations.
The choise of significance test and associated number of Monte-Carlo simulations are passed through the `settings` parameter.
The False Discovery Rate method is applied to the assessment of significance when plotting the result.
If the computation fails, a diagnostic message returns the index and label of the incriminated series.
Parameters
----------
Expand Down Expand Up @@ -809,12 +812,23 @@ def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearson
-------
corr : CorrEns
the result object
the result object, containing the following:
- statistic r (array of real numbers)
- p-values pval (array of real numbers)
- signif (array of booleans)
- alpha (significance level)
See also
--------
pyleoclim.core.series.Series.correlation : Series-level correlation
pyleoclim.utils.correlation.corr_sig : Correlation function
pyleoclim.utils.correlation.association : SciPy measures of association between variables
pyleoclim.series.surrogates : parametric and non-parametric surrogates of any Series object
pyleoclim.multipleseries.common_time : Aligning time axes
pyleoclim.utils.correlation.fdr : FDR function
Expand All @@ -825,7 +839,6 @@ def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearson
.. jupyter-execute::
import pyleoclim as pyleo
from pyleoclim.utils.tsmodel import colored_noise
import numpy as np
Expand Down Expand Up @@ -866,15 +879,22 @@ def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearson
if target is None:
target = self.series_list[0]

print("Looping over "+ str(len(self.series_list)) +" Series in collection")
print(f"Looping over {len(self.series_list)} Series in collection")
for idx, ts in tqdm(enumerate(self.series_list), total=len(self.series_list), disable=mute_pbar):
corr_res = ts.correlation(target, timespan=timespan, alpha=alpha, settings=settings,
method=method, number=number, statistic=statistic,
common_time_kwargs=common_time_kwargs, seed=seed)
r_list.append(corr_res.r)
signif_list.append(corr_res.signif)
p_list.append(corr_res.p)

try:
corr_res = ts.correlation(target, timespan=timespan, alpha=alpha, settings=settings,
method=method, number=number, statistic=statistic,
common_time_kwargs=common_time_kwargs, seed=seed)
r_list.append(corr_res.r)
signif_list.append(corr_res.signif)
p_list.append(corr_res.p)
except:
ovrlp = ts.overlap(target)
print(f"Computation failed for series #{idx}, {ts.label}; overlap is {ovrlp} {ts.time_unit}")
r_list.append(np.nan)
signif_list.append(None)
p_list.append(np.nan)

r_list = np.array(r_list)
signif_fdr_list = []
fdr_kwargs = {} if fdr_kwargs is None else fdr_kwargs.copy()
Expand Down
Loading

0 comments on commit 84baeaa

Please sign in to comment.