Skip to content

Commit

Permalink
Better coverage mixed_anova
Browse files Browse the repository at this point in the history
  • Loading branch information
raphaelvallat committed Jul 21, 2019
1 parent 564667f commit 48a218d
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 108 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ include pingouin/datasets/datasets.csv
include pingouin/datasets/icc.csv
include pingouin/datasets/mediation.csv
include pingouin/datasets/mixed_anova.csv
include pingouin/datasets/mixed_anova_unbalanced.csv
include pingouin/datasets/multivariate.csv
include pingouin/datasets/pairwise_corr.csv
include pingouin/datasets/pairwise_ttests.csv
Expand Down
1 change: 1 addition & 0 deletions pingouin/datasets/datasets.csv
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ cronbach_wide_missing,Questionnaire rating (binary) in wide format and with miss
icc,Wine quality rating by 4 judges,intraclass_corr,www.real-statistics.com
mediation,Mediation analysis,linear_regression - mediation,https://data.library.virginia.edu/introduction-to-mediation-analysis/
mixed_anova,Memory scores in two groups at three time points,mixed_anova,Pingouin
mixed_anova_unbalanced,Memory scores in three groups at four time points,mixed_anova,Pingouin
multivariate,Multivariate health outcomes in drug and placebo conditions,multivariate statistics,www.real-statistics.com
pairwise_corr,Big 5 personality traits,corr - pairwise_corr,Dolan et al 2009
pairwise_ttests,Scores at 3 time points per gender,pairwise_ttests,Pingouin
Expand Down
105 changes: 105 additions & 0 deletions pingouin/datasets/mixed_anova_unbalanced.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
Scores,Time,Group,Subject
5.971435163732493,T0,Control,0
4.309024305293535,T0,Control,1
6.387162940307738,T0,Control,5
6.359588413717416,T0,Control,6
4.863476495582651,T0,Control,7
5.515696372114429,T0,Control,8
3.2573150458145945,T0,Control,9
6.650035724719818,T1,Control,0
6.4919460223426775,T1,Control,1
5.502118364683486,T1,Control,5
5.905453411570191,T1,Control,6
5.789091940980035,T1,Control,7
6.821158192129386,T1,Control,8
3.95309444677076,T1,Control,9
5.297353675370818,T2,Control,0
4.844030655861066,T2,Control,1
5.0306947152941,T2,Control,5
6.175554085122381,T2,Control,6
3.6829727734098032,T2,Control,7
5.316891459821001,T2,Control,8
6.558969187571151,T2,Control,9
5.102159771800008,T3,Control,0
5.837437653613972,T3,Control,1
5.3779084251523255,T3,Control,5
5.624712953768216,T3,Control,6
5.177205194391704,T3,Control,7
6.3416747129961415,T3,Control,8
7.890960515463033,T3,Control,9
5.776199587837237,T0,Therapy,10
5.133554069535044,T0,Therapy,11
5.7361419366840725,T0,Therapy,12
3.625022399309971,T0,Therapy,13
5.947792199748547,T0,Therapy,14
4.802843215560301,T0,Therapy,15
5.563205166738653,T0,Therapy,16
5.7182891913492195,T0,Therapy,17
6.455413982398135,T0,Therapy,18
6.5410087949313915,T1,Therapy,10
4.254189922955694,T1,Therapy,11
4.298026718499156,T1,Therapy,12
5.599081800051087,T1,Therapy,13
5.151757550813145,T1,Therapy,14
5.555380491630616,T1,Therapy,15
6.054020332199238,T1,Therapy,16
5.66448697472186,T1,Therapy,17
6.2657383060625955,T1,Therapy,18
4.725763666232685,T2,Therapy,10
5.6296551228958975,T2,Therapy,11
6.007968855216035,T2,Therapy,12
5.491501236894121,T2,Therapy,13
6.733800732555499,T2,Therapy,14
3.2995463661877045,T2,Therapy,15
7.730603620838799,T2,Therapy,16
4.5573687109772365,T2,Therapy,17
5.911883386777701,T2,Therapy,18
4.914564788236803,T3,Therapy,10
6.162059737162049,T3,Therapy,11
6.404228225462175,T3,Therapy,12
6.223507967893809,T3,Therapy,13
4.773745686469774,T3,Therapy,14
7.707842950778001,T3,Therapy,15
5.926962541870895,T3,Therapy,16
4.547340890749048,T3,Therapy,17
6.33197944580913,T3,Therapy,18
5.864392325050896,T0,Meditation,20
1.836483339375265,T0,Meditation,21
6.721105615470206,T0,Meditation,22
5.552630552204535,T0,Meditation,23
5.564529542932399,T0,Meditation,24
4.969904309123513,T0,Meditation,25
6.167368735752412,T0,Meditation,26
6.384919841909897,T0,Meditation,27
5.670835848826805,T0,Meditation,28
6.791986193446408,T0,Meditation,29
5.879842313008629,T1,Meditation,20
5.400035419303477,T1,Meditation,21
4.772149441318094,T1,Meditation,22
5.215281788739212,T1,Meditation,23
6.616593926547842,T1,Meditation,24
5.718052948173335,T1,Meditation,25
5.455233985745355,T1,Meditation,26
6.328288145297394,T1,Meditation,27
4.7310112165198674,T1,Meditation,28
5.288118690873185,T1,Meditation,29
6.591205359743063,T2,Meditation,20
6.866533696353573,T2,Meditation,21
6.803591759111203,T2,Meditation,22
6.5852956847818565,T2,Meditation,23
6.78428811274975,T2,Meditation,24
7.663481512426145,T2,Meditation,25
5.518894716374608,T2,Meditation,26
5.8319823336625145,T2,Meditation,27
7.524574355126174,T2,Meditation,28
5.0188917248559575,T2,Meditation,29
7.275475504274325,T3,Meditation,20
4.689284675970471,T3,Meditation,21
5.949234896863726,T3,Meditation,22
7.149163805919065,T3,Meditation,23
6.196067133898749,T3,Meditation,24
6.217824588334266,T3,Meditation,25
7.080656004381456,T3,Meditation,26
4.581501009608386,T3,Meditation,27
6.447071635325711,T3,Meditation,28
6.794844209327205,T3,Meditation,29
184 changes: 76 additions & 108 deletions pingouin/tests/test_parametric.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
import numpy as np
from numpy.testing import assert_array_equal as array_equal
from unittest import TestCase
from pingouin.parametric import (ttest, anova, rm_anova, mixed_anova,
ancova, welch_anova)
Expand Down Expand Up @@ -104,53 +105,34 @@ def test_anova(self):
df_aov2 = read_dataset('anova2')
aov2 = anova(dv="Yield", between=["Blend", "Crop"],
data=df_aov2).round(3)
assert aov2.loc[0, 'MS'] == 2.042
assert aov2.loc[1, 'MS'] == 1368.292
assert aov2.loc[2, 'MS'] == 1180.042
assert aov2.loc[3, 'MS'] == 541.847
assert aov2.loc[0, 'F'] == 0.004
assert aov2.loc[1, 'F'] == 2.525
assert aov2.loc[2, 'F'] == 2.178
assert aov2.loc[0, 'p-unc'] == 0.952
assert aov2.loc[1, 'p-unc'] == 0.108
assert aov2.loc[2, 'p-unc'] == 0.142
assert aov2.loc[0, 'np2'] == 0.000
assert aov2.loc[1, 'np2'] == 0.219
assert aov2.loc[2, 'np2'] == 0.195
array_equal(aov2.loc[:, 'MS'].values, [2.042, 1368.292, 1180.042,
541.847])
array_equal(aov2.loc[[0, 1, 2], 'F'].values, [0.004, 2.525, 2.178])
array_equal(aov2.loc[[0, 1, 2], 'p-unc'].values, [0.952, 0.108, 0.142])
array_equal(aov2.loc[[0, 1, 2], 'np2'].values, [0.000, 0.219, 0.195])

# Export
anova(dv="Yield", between=["Blend", "Crop"],
export_filename='test_export.csv', data=df_aov2)
# Two-way ANOVA with unbalanced design
df_aov2 = read_dataset('anova2_unbalanced')
aov2 = df_aov2.anova(dv="Scores", export_filename='test_export.csv',
between=["Diet", "Exercise"]).round(3)
assert aov2.loc[0, 'MS'] == 390.625
assert aov2.loc[1, 'MS'] == 180.625
assert aov2.loc[2, 'MS'] == 15.625
assert aov2.loc[3, 'MS'] == 52.625
assert aov2.loc[0, 'F'] == 7.423
assert aov2.loc[1, 'F'] == 3.432
assert aov2.loc[2, 'F'] == 0.297
assert aov2.loc[0, 'p-unc'] == 0.034
assert aov2.loc[1, 'p-unc'] == 0.113
assert aov2.loc[2, 'p-unc'] == 0.605
assert aov2.loc[0, 'np2'] == 0.553
assert aov2.loc[1, 'np2'] == 0.364
assert aov2.loc[2, 'np2'] == 0.047
array_equal(aov2.loc[:, 'MS'].values, [390.625, 180.625, 15.625,
52.625])
array_equal(aov2.loc[[0, 1, 2], 'F'].values, [7.423, 3.432, 0.297])
array_equal(aov2.loc[[0, 1, 2], 'p-unc'].values, [0.034, 0.113, 0.605])
array_equal(aov2.loc[[0, 1, 2], 'np2'].values, [0.553, 0.364, 0.047])

# Two-way ANOVA with unbalanced design and missing values
df_aov2.loc[9, 'Scores'] = np.nan
# Type 2
aov2 = anova(dv="Scores", between=["Diet", "Exercise"],
data=df_aov2).round(3)
assert aov2.loc[0, 'F'] == 10.403
assert aov2.loc[1, 'F'] == 5.167
assert aov2.loc[2, 'F'] == 0.761
assert aov2.loc[0, 'p-unc'] == 0.023
assert aov2.loc[1, 'p-unc'] == 0.072
assert aov2.loc[2, 'p-unc'] == 0.423
assert aov2.loc[0, 'np2'] == 0.675
assert aov2.loc[1, 'np2'] == 0.508
assert aov2.loc[2, 'np2'] == 0.132
array_equal(aov2.loc[[0, 1, 2], 'F'].values, [10.403, 5.167, 0.761])
array_equal(aov2.loc[[0, 1, 2], 'p-unc'].values, [0.023, 0.072, 0.423])
array_equal(aov2.loc[[0, 1, 2], 'np2'].values, [0.675, 0.508, 0.132])
# Type 1
aov2_ss1 = anova(dv="Scores", between=["Diet", "Exercise"],
ss_type=1, data=df_aov2).round(3)
assert not aov2.equals(aov2_ss1)
Expand All @@ -169,15 +151,12 @@ def test_anova(self):
assert aov3_ss1.equals(aov3_ss2)
assert aov3_ss2.equals(aov3_ss3)
# Compare with JASP
np.testing.assert_array_equal(aov3_ss1.loc[:, 'F'],
[2.462, 13.449, 0.484, 0.139, 1.522,
1.446, 1.094, np.nan])
np.testing.assert_array_equal(aov3_ss1.loc[:, 'np2'],
[0.049, 0.219, 0.020, 0.003, 0.060,
0.057, 0.044, np.nan])
np.testing.assert_array_equal(aov3_ss1.loc[:, 'p-unc'],
[0.123, 0.001, 0.619, 0.711, 0.229,
0.245, 0.343, np.nan])
array_equal(aov3_ss1.loc[:, 'F'], [2.462, 13.449, 0.484, 0.139, 1.522,
1.446, 1.094, np.nan])
array_equal(aov3_ss1.loc[:, 'np2'], [0.049, 0.219, 0.020, 0.003, 0.060,
0.057, 0.044, np.nan])
array_equal(aov3_ss1.loc[:, 'p-unc'], [0.123, 0.001, 0.619, 0.711,
0.229, 0.245, 0.343, np.nan])
# Unbalanced
df_aov3 = read_dataset('anova3_unbalanced')
aov3_ss1 = anova(dv="Cholesterol", between=['Sex', 'Risk', 'Drug'],
Expand All @@ -188,40 +167,31 @@ def test_anova(self):
ss_type=3, data=df_aov3).round(3)
# Compare with JASP
# Type 1
np.testing.assert_array_equal(aov3_ss1.loc[:, 'F'],
[4.155, 15.166, 0.422, 0.085, 0.859,
1.170, 0.505, np.nan])
np.testing.assert_array_equal(aov3_ss1.loc[:, 'np2'],
[0.068, 0.210, 0.015, 0.001, 0.029,
0.039, 0.017, np.nan])
np.testing.assert_array_equal(aov3_ss1.loc[:, 'p-unc'],
[0.046, 0., 0.658, 0.772, 0.429,
0.318, 0.606, np.nan])
np.testing.assert_array_equal(aov3_ss1.loc[:, 'Source'],
['Sex', 'Risk', 'Drug', 'Sex * Risk',
'Sex * Drug', 'Risk * Drug',
'Sex * Risk * Drug', 'Residual'])
array_equal(aov3_ss1.loc[:, 'F'], [4.155, 15.166, 0.422, 0.085, 0.859,
1.170, 0.505, np.nan])
array_equal(aov3_ss1.loc[:, 'np2'], [0.068, 0.210, 0.015, 0.001, 0.029,
0.039, 0.017, np.nan])
array_equal(aov3_ss1.loc[:, 'p-unc'], [0.046, 0., 0.658, 0.772, 0.429,
0.318, 0.606, np.nan])
array_equal(aov3_ss1.loc[:, 'Source'],
['Sex', 'Risk', 'Drug', 'Sex * Risk',
'Sex * Drug', 'Risk * Drug', 'Sex * Risk * Drug',
'Residual'])
# Type 2
np.testing.assert_array_equal(aov3_ss2.loc[:, 'F'],
[3.759, 15.169, 0.429, 0.099, 0.739,
1.170, 0.505, np.nan])
np.testing.assert_array_equal(aov3_ss2.loc[:, 'np2'],
[0.062, 0.210, 0.015, 0.002, 0.025,
0.039, 0.017, np.nan])
np.testing.assert_array_equal(aov3_ss2.loc[:, 'p-unc'],
[0.057, 0., 0.653, 0.754, 0.482,
0.318, 0.606, np.nan])
array_equal(aov3_ss2.loc[:, 'F'], [3.759, 15.169, 0.429, 0.099, 0.739,
1.170, 0.505, np.nan])
array_equal(aov3_ss2.loc[:, 'np2'], [0.062, 0.210, 0.015, 0.002, 0.025,
0.039, 0.017, np.nan])
array_equal(aov3_ss2.loc[:, 'p-unc'], [0.057, 0., 0.653, 0.754, 0.482,
0.318, 0.606, np.nan])

# Type 3
np.testing.assert_array_equal(aov3_ss3.loc[:, 'F'],
[3.910, 15.555, 0.484, 0.079, 0.750,
1.060, 0.505, np.nan])
np.testing.assert_array_equal(aov3_ss3.loc[:, 'np2'],
[0.064, 0.214, 0.017, 0.001, 0.026,
0.036, 0.017, np.nan])
np.testing.assert_array_equal(aov3_ss3.loc[:, 'p-unc'],
[0.053, 0., 0.619, 0.779, 0.477,
0.353, 0.606, np.nan])
array_equal(aov3_ss3.loc[:, 'F'], [3.910, 15.555, 0.484, 0.079, 0.750,
1.060, 0.505, np.nan])
array_equal(aov3_ss3.loc[:, 'np2'], [0.064, 0.214, 0.017, 0.001, 0.026,
0.036, 0.017, np.nan])
array_equal(aov3_ss3.loc[:, 'p-unc'], [0.053, 0., 0.619, 0.779, 0.477,
0.353, 0.606, np.nan])

# Error: invalid char in column names
df_aov3['Sex:'] = np.random.normal(size=df_aov3.shape[0])
Expand Down Expand Up @@ -276,15 +246,9 @@ def test_rm_anova2(self):
aov = rm_anova(data=data, subject='Subject', within=['Time', 'Metric'],
dv='Performance',
export_filename='test_export.csv').round(3)
assert aov.loc[0, "MS"] == 828.817
assert aov.loc[1, "MS"] == 682.617
assert aov.loc[2, "MS"] == 112.217
assert aov.loc[0, "F"] == 33.852
assert aov.loc[1, "F"] == 26.959
assert aov.loc[2, "F"] == 12.632
assert aov.loc[0, "np2"] == 0.790
assert aov.loc[1, "np2"] == 0.750
assert aov.loc[2, "np2"] == 0.584
array_equal(aov.loc[:, 'MS'].values, [828.817, 682.617, 112.217])
array_equal(aov.loc[:, 'F'].values, [33.852, 26.959, 12.632])
array_equal(aov.loc[:, 'np2'].values, [0.790, 0.750, 0.584])
assert aov.loc[0, "eps"] == 1.000
assert aov.loc[1, "eps"] == 0.969
assert aov.loc[2, "eps"] >= 0.500 # 0.5 is the lower bound
Expand All @@ -301,51 +265,55 @@ def test_rm_anova2(self):
def test_mixed_anova(self):
"""Test function anova.
Compare with JASP and ezANOVA."""
# Balanced design, two groups, three within factors
aov = mixed_anova(dv='Scores', within='Time', subject='Subject',
between='Group', data=df, correction=True).round(3)
# Compare with ezANOVA / JASP
assert aov.loc[0, 'SS'] == 5.460
assert aov.loc[1, 'SS'] == 7.628
assert aov.loc[2, 'SS'] == 5.168
assert aov.loc[0, 'F'] == 5.052
assert aov.loc[1, 'F'] == 4.027
assert aov.loc[2, 'F'] == 2.728
assert aov.loc[0, 'np2'] == 0.080
assert aov.loc[1, 'np2'] == 0.065
assert aov.loc[2, 'np2'] == 0.045
array_equal(aov.loc[:, 'SS'].values, [5.460, 7.628, 5.168])
array_equal(aov.loc[:, 'DF1'].values, [1, 2, 2])
array_equal(aov.loc[:, 'DF2'].values, [58, 116, 116])
array_equal(aov.loc[:, 'F'].values, [5.052, 4.027, 2.728])
array_equal(aov.loc[:, 'np2'].values, [0.080, 0.065, 0.045])
assert aov.loc[1, 'eps'] == 0.999
assert aov.loc[1, 'W-spher'] == 0.999
assert round(aov.loc[1, 'p-GG-corr'], 2) == 0.02

# With missing values
df_nan2 = df_nan.copy()
df_nan2.iloc[158, 0] = np.nan
aov = mixed_anova(dv='Scores', within='Time', subject='Subject',
between='Group', data=df_nan2, correction=True,
export_filename='test_export.csv').round(3)
# Compare with ezANOVA / JASP
assert aov.loc[0, 'F'] == 5.692
assert aov.loc[1, 'F'] == 3.053
assert aov.loc[2, 'F'] == 3.501
assert aov.loc[0, 'np2'] == 0.094
assert aov.loc[1, 'np2'] == 0.053
assert aov.loc[2, 'np2'] == 0.060
array_equal(aov.loc[:, 'F'].values, [5.692, 3.053, 3.501])
array_equal(aov.loc[:, 'np2'].values, [0.094, 0.053, 0.060])
assert aov.loc[1, 'eps'] == 0.997
assert aov.loc[1, 'W-spher'] == 0.996

# Unbalanced group
df_unbalanced = df[df["Subject"] <= 54]
aov = mixed_anova(data=df_unbalanced, dv='Scores',
subject='Subject', within='Time', between='Group',
correction=True).round(3)
# Compare with ezANOVA / JASP
assert aov.loc[0, 'F'] == 3.561
assert aov.loc[1, 'F'] == 2.421
assert aov.loc[2, 'F'] == 1.827
assert aov.loc[0, 'np2'] == 0.063
assert aov.loc[1, 'np2'] == 0.044
assert aov.loc[2, 'np2'] == 0.033
array_equal(aov.loc[:, 'F'].values, [3.561, 2.421, 1.827])
array_equal(aov.loc[:, 'np2'].values, [0.063, 0.044, 0.033])
assert aov.loc[1, 'eps'] == 1. # JASP = 0.998
assert aov.loc[1, 'W-spher'] == 1. # JASP = 0.998

# With three groups and four time points, unbalanced
df_unbalanced = read_dataset('mixed_anova_unbalanced.csv')
aov = mixed_anova(data=df_unbalanced, dv='Scores', subject='Subject',
correction=True, within='Time',
between='Group').round(3)
array_equal(aov.loc[:, 'DF1'].values, [2, 3, 6])
array_equal(aov.loc[:, 'DF2'].values, [23, 69, 69])
array_equal(aov.loc[:, 'F'].values, [2.302, 1.707, 0.888])
array_equal(aov.loc[:, 'p-unc'].values,
np.round([1.225913e-01, 1.736066e-01, 5.087900e-01], 3))
# Check correction: values are very slightly different than ezANOVA
assert np.isclose(aov.at[1, 'eps'], 0.925374, atol=0.001)
assert np.isclose(aov.at[1, 'p-GG-corr'], 0.1779205, atol=0.01)
assert np.isclose(aov.at[1, 'W-spher'], 0.8850318, atol=0.01)
assert np.isclose(aov.at[1, 'p-spher'], 0.7535238, atol=0.1)

def test_ancova(self):
"""Test function ancovan.
Compare with JASP."""
Expand Down

0 comments on commit 48a218d

Please sign in to comment.