Compare series + fix correlations (#620)

* new overlap function in tsbase.py * added CI test * debugged new correlation clause with pytest see test_correlation_t4() * CI test for multipleseries.correlation() with no overlap * updated docstrings * compare() method is functional; has docstring * typo fix * new overlap() method used in Series.correlation and MultipleSeries.correlation * unit tests for overlap() * CI tests bug fixes
LinkedEarth · Oct 18, 2024 · 84baeaa · 84baeaa
1 parent a384df9
commit 84baeaa
Show file tree

Hide file tree

Showing 8 changed files with 305 additions and 100 deletions.
diff --git a/pyleoclim/core/ensembleseries.py b/pyleoclim/core/ensembleseries.py
@@ -536,8 +536,8 @@ def correlation(self, target=None, timespan=None, alpha=0.05, method = 'ttest',
 
             The significance level (0.05 by default)
        
-        method : str, {'ttest','built-in','ar1sim','phaseran'}
-            method for significance testing. Default is 'ttest'
+        method : str, {'ttest','built-in','ar1sim','phaseran','CN'}
+            method for significance testing. Default is 'ttest' to lower computational cost, but this is not always the best choice
             
         statistic : str
             The name of the statistic used to measure the association, to be chosen from a subset of
@@ -576,12 +576,13 @@ def correlation(self, target=None, timespan=None, alpha=0.05, method = 'ttest',
 
         See also
         --------
-
-        pyleoclim.utils.correlation.corr_sig : Correlation function
-
+        pyleoclim.core.series.Series.correlation : pairwise correlations
+        
         pyleoclim.utils.correlation.fdr : False Discovery Rate
 
         pyleoclim.core.correns.CorrEns : The correlation ensemble object
+        
+        pyleoclim.core.surrogateseries.SurrogateSeries : surrogate series
 
         Examples
         --------
@@ -612,7 +613,7 @@ def correlation(self, target=None, timespan=None, alpha=0.05, method = 'ttest',
             print(corr_res)
             
         The `print` function tabulates the output, and conveys the p-value according
-        to the correlation test applied ("isospec", by default). To plot the result:
+        to the correlation test applied ("ttest", by default). To plot the result:
         
         .. jupyter-execute::
             
@@ -630,16 +631,22 @@ def correlation(self, target=None, timespan=None, alpha=0.05, method = 'ttest',
             if hasattr(target, 'series_list'):
                 nEns = np.size(target.series_list)
                 if idx < nEns:
-                    value2 = target.series_list[idx].value
-                    time2 = target.series_list[idx].time
+                    ts2 = target.series_list[idx].copy()
+                    #value2 = target.series_list[idx].value
+                    #time2 = target.series_list[idx].time
+
                 else:
-                    value2 = target.series_list[idx-nEns].value
-                    time2 = target.series_list[idx-nEns].time
+                    #value2 = target.series_list[idx-nEns].value
+                    #time2 = target.series_list[idx-nEns].time
+                    ts2 = target.series_list[idx-nEns].copy()
             else:
-                value2 = target.value
-                time2 = target.time
-
-            ts2 = Series(time=time2, value=value2, verbose=idx==0, auto_time_params=False)
+                ts2 = target.copy()
+                #value2 = target.value
+                #time2 = target.time
+
+            #ts2 = target.series_list[idx].copy()
+            #ts2.time = time2; ts2.value = value2 
+            #ts2 = Series(time=time2, value=value2, verbose=idx==0, auto_time_params=False) 
             corr_res = ts1.correlation(ts2, timespan=timespan, method=method,number=number,
                                        statistic=statistic,
                                        settings=settings, mute_pbar=True,

diff --git a/pyleoclim/core/multipleseries.py b/pyleoclim/core/multipleseries.py
@@ -17,7 +17,7 @@
 import numpy as np
 from copy import deepcopy
 
-from matplotlib.ticker import (MultipleLocator, AutoMinorLocator, FormatStrFormatter)
+from matplotlib.ticker import (AutoMinorLocator, FormatStrFormatter)
 import matplotlib.transforms as transforms
 import matplotlib as mpl
 import matplotlib.pyplot as plt
@@ -735,6 +735,8 @@ def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearson
                     fdr_kwargs=None, common_time_kwargs=None, mute_pbar=False, seed=None):
         ''' Calculate the correlation between a MultipleSeries and a target Series
 
+        This function recursively applies Series.correlation() to members of the MultipleSeries object.
+
         The significance of the correlation is assessed using one of the following methods:
 
         1. 'ttest': T-test adjusted for effective sample size, see [1]
@@ -748,8 +750,9 @@ def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearson
         The T-test is a parametric test, hence computationally cheap, but can only be performed in ideal circumstances.
         The others are non-parametric, but their computational requirements scale with the number of simulations.
 
-        The choise of significance test and associated number of Monte-Carlo simulations are passed through the `settings` parameter.
-
+        The False Discovery Rate method is applied to the assessment of significance when plotting the result. 
+        
+        If the computation fails, a diagnostic message returns the index and label of the incriminated series. 
         
         Parameters
         ----------
@@ -809,12 +812,23 @@ def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearson
         -------
         corr : CorrEns
         
-            the result object
+            the result object, containing the following:
+            - statistic r (array of real numbers)
+            - p-values pval (array of real numbers)
+            - signif (array of booleans)
+            - alpha (significance level)
+            
 
         See also
         --------
+        
+        pyleoclim.core.series.Series.correlation :  Series-level correlation 
 
-        pyleoclim.utils.correlation.corr_sig : Correlation function
+        pyleoclim.utils.correlation.association : SciPy measures of association between variables
+
+        pyleoclim.series.surrogates : parametric and non-parametric surrogates of any Series object
+
+        pyleoclim.multipleseries.common_time : Aligning time axes
 
         pyleoclim.utils.correlation.fdr : FDR function
         
@@ -825,7 +839,6 @@ def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearson
 
         .. jupyter-execute::
 
-            import pyleoclim as pyleo
             from pyleoclim.utils.tsmodel import colored_noise
             import numpy as np
 
@@ -866,15 +879,22 @@ def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearson
         if target is None:
             target = self.series_list[0]
 
-        print("Looping over "+ str(len(self.series_list)) +" Series in collection")
+        print(f"Looping over {len(self.series_list)} Series in collection")
         for idx, ts in tqdm(enumerate(self.series_list),  total=len(self.series_list), disable=mute_pbar):
-            corr_res = ts.correlation(target, timespan=timespan, alpha=alpha, settings=settings,
-                                      method=method, number=number, statistic=statistic,
-                                      common_time_kwargs=common_time_kwargs, seed=seed)
-            r_list.append(corr_res.r)
-            signif_list.append(corr_res.signif)
-            p_list.append(corr_res.p)
-
+            try:
+                corr_res = ts.correlation(target, timespan=timespan, alpha=alpha, settings=settings,
+                                          method=method, number=number, statistic=statistic,
+                                          common_time_kwargs=common_time_kwargs, seed=seed)
+                r_list.append(corr_res.r)
+                signif_list.append(corr_res.signif)
+                p_list.append(corr_res.p)
+            except:
+                ovrlp = ts.overlap(target)
+                print(f"Computation failed for series #{idx}, {ts.label}; overlap is {ovrlp} {ts.time_unit}")
+                r_list.append(np.nan)
+                signif_list.append(None)
+                p_list.append(np.nan)
+
         r_list = np.array(r_list)
         signif_fdr_list = []
         fdr_kwargs = {} if fdr_kwargs is None else fdr_kwargs.copy()