-
-
Notifications
You must be signed in to change notification settings - Fork 142
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
674060e
commit 01e9fd8
Showing
2 changed files
with
132 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
@ARTICLE{Bakdash2017, | ||
title = "Repeated Measures Correlation", | ||
author = "Bakdash, Jonathan Z and Marusich, Laura R", | ||
abstract = "Repeated measures correlation (rmcorr) is a statistical technique | ||
for determining the common within-individual association for | ||
paired measures assessed on two or more occasions for multiple | ||
individuals. Simple regression/correlation is often applied to | ||
non-independent observations or aggregated data; this may produce | ||
biased, specious results due to violation of independence and/or | ||
differing patterns between-participants versus | ||
within-participants. Unlike simple regression/correlation, rmcorr | ||
does not violate the assumption of independence of observations. | ||
Also, rmcorr tends to have much greater statistical power because | ||
neither averaging nor aggregation is necessary for an | ||
intra-individual research question. Rmcorr estimates the common | ||
regression slope, the association shared among individuals. To | ||
make rmcorr accessible, we provide background information for its | ||
assumptions and equations, visualization, power, and tradeoffs | ||
with rmcorr compared to multilevel modeling. We introduce the R | ||
package (rmcorr) and demonstrate its use for inferential | ||
statistics and visualization with two example datasets. The | ||
examples are used to illustrate research questions at different | ||
levels of analysis, intra-individual, and inter-individual. | ||
Rmcorr is well-suited for research questions regarding the common | ||
linear association in paired repeated measures data. All results | ||
are fully reproducible.", | ||
journal = "Front. Psychol.", | ||
volume = 8, | ||
pages = "456", | ||
month = apr, | ||
year = 2017, | ||
keywords = "correlation; individual differences; intra-individual; multilevel | ||
modeling; repeated measures; statistical power", | ||
language = "en" | ||
} | ||
|
||
|
||
@ARTICLE{Pernet2012, | ||
title = "Robust correlation analyses: false positive and power validation | ||
using a new open source matlab toolbox", | ||
author = "Pernet, Cyril R and Wilcox, Rand and Rousselet, Guillaume A", | ||
abstract = "Pearson's correlation measures the strength of the association | ||
between two variables. The technique is, however, restricted to | ||
linear associations and is overly sensitive to outliers. Indeed, | ||
a single outlier can result in a highly inaccurate summary of | ||
the data. Yet, it remains the most commonly used measure of | ||
association in psychology research. Here we describe a free | ||
Matlab((R)) based toolbox | ||
(http://sourceforge.net/projects/robustcorrtool/) that computes | ||
robust measures of association between two or more random | ||
variables: the percentage-bend correlation and | ||
skipped-correlations. After illustrating how to use the toolbox, | ||
we show that robust methods, where outliers are down weighted or | ||
removed and accounted for in significance testing, provide | ||
better estimates of the true association with accurate false | ||
positive control and without loss of power. The different | ||
correlation methods were tested with normal data and normal data | ||
contaminated with marginal or bivariate outliers. We report | ||
estimates of effect size, false positive rate and power, and | ||
advise on which technique to use depending on the data at hand.", | ||
journal = "Front. Psychol.", | ||
publisher = "frontiersin.org", | ||
volume = 3, | ||
pages = "606", | ||
year = 2012, | ||
keywords = "MATLAB; correlation; outliers; power; robust statistics", | ||
language = "en" | ||
} | ||
|
||
|
||
@ARTICLE{Berens2009, | ||
title = "{CircStat}: A {MATLAB} Toolbox for Circular Statistics", | ||
author = "Berens, Philipp", | ||
abstract = "Directional data is ubiquitious in science. Due to its circular | ||
nature such data cannot be analyzed with commonly used | ||
statistical techniques. Despite the rapid development of | ||
specialized methods for directional statistics over the last | ||
fifty years, there is only little software available that makes | ||
such methods easy to use for practioners. Most importantly, one | ||
of the most commonly used programming languages in biosciences, | ||
MATLAB, is currently not supporting directional statistics. To | ||
remedy this situation, we have implemented the CircStat toolbox | ||
for MATLAB which provides methods for the descriptive and | ||
inferential statistical analysis of directional data. We cover | ||
the statistical background of the available methods and describe | ||
how to apply them to data. Finally, we analyze a dataset from | ||
neurophysiology to demonstrate the capabilities of the CircStat | ||
toolbox.", | ||
journal = "Journal of Statistical Software, Articles", | ||
volume = 31, | ||
number = 10, | ||
pages = "1--21", | ||
year = 2009 | ||
} | ||
|
||
@InProceedings{ Pandas, | ||
author = { Wes McKinney }, | ||
title = { Data Structures for Statistical Computing in Python }, | ||
booktitle = { Proceedings of the 9th Python in Science Conference }, | ||
pages = { 51 - 56 }, | ||
year = { 2010 }, | ||
editor = { Stefan van der Walt and Jarrod Millman } | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
--- | ||
title: 'Pingouin: statistics in Python' | ||
tags: | ||
- statistics | ||
- python | ||
- data analysis | ||
- pandas | ||
authors: | ||
- name: Raphael Vallat | ||
orcid: 0000-0003-1779-7653 | ||
affiliation: "1" | ||
affiliations: | ||
- name: Department of Psychology, University of California, Berkeley. | ||
index: 1 | ||
date: 05 October 2018 | ||
bibliography: paper.bib | ||
--- | ||
|
||
# Summary | ||
|
||
Python is currently the fastest growing programming language in the world, thanks to its ease-of-use, fast learning curve and its numerous high quality packages for data science and machine-learning. Surprisingly however, Python is far behind the R programming language when it comes to general statistics and for this reason many scientists still rely heavily on R to perform their statistical analyses. | ||
|
||
In this paper, we present ``Pingouin``, an open-source Python package aimed at partially filling this gap by providing easy-to-use functions for computing some of the main statistical tests that scientists use on an every day basis. This includes basics functions such as ANOVAs, ANCOVAs, post-hoc tests, non-parametric tests, effect sizes, as well as more advanced functions such as Bayesian T-tests, repeated measures correlations [@Bakdash2017], robust correlations [@Pernet2012] and circular statistics [@Berens2009], to cite but a few. ``Pingouin`` is written in Python 3 and is mostly built on top of the Pandas [@Pandas] library, therefore allowing a fluid integration within a data analysis pipeline. ``Pingouin`` comes with an extensive documentation and API as well as with several Jupyter notebook examples. | ||
|
||
# Citations | ||
|
||
Citations to entries in paper.bib should be in [rMarkdown](http://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html) format. | ||
|
||
# References |