Skip to content

Commit

Permalink
Added JOSS paper
Browse files Browse the repository at this point in the history
  • Loading branch information
raphaelvallat committed Oct 5, 2018
1 parent 674060e commit 01e9fd8
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 0 deletions.
103 changes: 103 additions & 0 deletions JOSS/paper.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
@ARTICLE{Bakdash2017,
title = "Repeated Measures Correlation",
author = "Bakdash, Jonathan Z and Marusich, Laura R",
abstract = "Repeated measures correlation (rmcorr) is a statistical technique
for determining the common within-individual association for
paired measures assessed on two or more occasions for multiple
individuals. Simple regression/correlation is often applied to
non-independent observations or aggregated data; this may produce
biased, specious results due to violation of independence and/or
differing patterns between-participants versus
within-participants. Unlike simple regression/correlation, rmcorr
does not violate the assumption of independence of observations.
Also, rmcorr tends to have much greater statistical power because
neither averaging nor aggregation is necessary for an
intra-individual research question. Rmcorr estimates the common
regression slope, the association shared among individuals. To
make rmcorr accessible, we provide background information for its
assumptions and equations, visualization, power, and tradeoffs
with rmcorr compared to multilevel modeling. We introduce the R
package (rmcorr) and demonstrate its use for inferential
statistics and visualization with two example datasets. The
examples are used to illustrate research questions at different
levels of analysis, intra-individual, and inter-individual.
Rmcorr is well-suited for research questions regarding the common
linear association in paired repeated measures data. All results
are fully reproducible.",
journal = "Front. Psychol.",
volume = 8,
pages = "456",
month = apr,
year = 2017,
keywords = "correlation; individual differences; intra-individual; multilevel
modeling; repeated measures; statistical power",
language = "en"
}


@ARTICLE{Pernet2012,
title = "Robust correlation analyses: false positive and power validation
using a new open source matlab toolbox",
author = "Pernet, Cyril R and Wilcox, Rand and Rousselet, Guillaume A",
abstract = "Pearson's correlation measures the strength of the association
between two variables. The technique is, however, restricted to
linear associations and is overly sensitive to outliers. Indeed,
a single outlier can result in a highly inaccurate summary of
the data. Yet, it remains the most commonly used measure of
association in psychology research. Here we describe a free
Matlab((R)) based toolbox
(http://sourceforge.net/projects/robustcorrtool/) that computes
robust measures of association between two or more random
variables: the percentage-bend correlation and
skipped-correlations. After illustrating how to use the toolbox,
we show that robust methods, where outliers are down weighted or
removed and accounted for in significance testing, provide
better estimates of the true association with accurate false
positive control and without loss of power. The different
correlation methods were tested with normal data and normal data
contaminated with marginal or bivariate outliers. We report
estimates of effect size, false positive rate and power, and
advise on which technique to use depending on the data at hand.",
journal = "Front. Psychol.",
publisher = "frontiersin.org",
volume = 3,
pages = "606",
year = 2012,
keywords = "MATLAB; correlation; outliers; power; robust statistics",
language = "en"
}


@ARTICLE{Berens2009,
title = "{CircStat}: A {MATLAB} Toolbox for Circular Statistics",
author = "Berens, Philipp",
abstract = "Directional data is ubiquitious in science. Due to its circular
nature such data cannot be analyzed with commonly used
statistical techniques. Despite the rapid development of
specialized methods for directional statistics over the last
fifty years, there is only little software available that makes
such methods easy to use for practioners. Most importantly, one
of the most commonly used programming languages in biosciences,
MATLAB, is currently not supporting directional statistics. To
remedy this situation, we have implemented the CircStat toolbox
for MATLAB which provides methods for the descriptive and
inferential statistical analysis of directional data. We cover
the statistical background of the available methods and describe
how to apply them to data. Finally, we analyze a dataset from
neurophysiology to demonstrate the capabilities of the CircStat
toolbox.",
journal = "Journal of Statistical Software, Articles",
volume = 31,
number = 10,
pages = "1--21",
year = 2009
}

@InProceedings{ Pandas,
author = { Wes McKinney },
title = { Data Structures for Statistical Computing in Python },
booktitle = { Proceedings of the 9th Python in Science Conference },
pages = { 51 - 56 },
year = { 2010 },
editor = { Stefan van der Walt and Jarrod Millman }
}
29 changes: 29 additions & 0 deletions JOSS/paper.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
---
title: 'Pingouin: statistics in Python'
tags:
- statistics
- python
- data analysis
- pandas
authors:
- name: Raphael Vallat
orcid: 0000-0003-1779-7653
affiliation: "1"
affiliations:
- name: Department of Psychology, University of California, Berkeley.
index: 1
date: 05 October 2018
bibliography: paper.bib
---

# Summary

Python is currently the fastest growing programming language in the world, thanks to its ease-of-use, fast learning curve and its numerous high quality packages for data science and machine-learning. Surprisingly however, Python is far behind the R programming language when it comes to general statistics and for this reason many scientists still rely heavily on R to perform their statistical analyses.

In this paper, we present ``Pingouin``, an open-source Python package aimed at partially filling this gap by providing easy-to-use functions for computing some of the main statistical tests that scientists use on an every day basis. This includes basics functions such as ANOVAs, ANCOVAs, post-hoc tests, non-parametric tests, effect sizes, as well as more advanced functions such as Bayesian T-tests, repeated measures correlations [@Bakdash2017], robust correlations [@Pernet2012] and circular statistics [@Berens2009], to cite but a few. ``Pingouin`` is written in Python 3 and is mostly built on top of the Pandas [@Pandas] library, therefore allowing a fluid integration within a data analysis pipeline. ``Pingouin`` comes with an extensive documentation and API as well as with several Jupyter notebook examples.

# Citations

Citations to entries in paper.bib should be in [rMarkdown](http://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html) format.

# References

0 comments on commit 01e9fd8

Please sign in to comment.