-
Notifications
You must be signed in to change notification settings - Fork 1
/
tests.py
92 lines (70 loc) · 3.1 KB
/
tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import pytest
import numpy as np
import pandas as pd
import os.path as op
from sklearn.metrics import roc_auc_score, recall_score
from noiseceiling import compute_nc_classification, compute_nc_regression
from noiseceiling import reduce_repeats
from noiseceiling.bootstrap import run_bootstraps_nc
def _load_data(classification=True):
f = op.join('noiseceiling', 'data', 'sub-xx_task-expressive_ratings.csv')
y = pd.read_csv(f, index_col=0)
f = op.join('noiseceiling', 'data', 'featurespace-AU.tsv')
X = pd.read_csv(f, sep='\t', index_col=0)
if classification:
y = y.query("rating_type == 'emotion'").query("rating != 'Geen van allen'")
y = y['rating']
else:
y = y.query("rating_type == 'arousal'")['rating']
X = X.loc[y.index, :]
return X, y
@pytest.mark.parametrize("classification", [False, True])
def test_no_repeats(classification):
""" Should raise a ValueError. """
X = pd.DataFrame(np.random.normal(0, 1, size=(100, 5)))
if classification:
y = pd.Series(np.random.choice(['a', 'b', 'c'], size=100))
with pytest.raises(ValueError):
compute_nc_classification(X, y)
else:
y = pd.Series(np.random.normal(0, 1, 100))
with pytest.raises(ValueError):
compute_nc_regression(X, y)
@pytest.mark.parametrize("classification", [False, True])
def test_bootstrap_nc(classification):
""" Should raise a ValueError. """
X, y = _load_data(classification=classification)
run_bootstraps_nc(X, y, classification=classification)
@pytest.mark.parametrize("use_index", [False, True])
@pytest.mark.parametrize("use_repeats_only", [False, True])
@pytest.mark.parametrize("per_class", [False, True])
def test_nc_classification(use_index, use_repeats_only, per_class):
X, y = _load_data(classification=True)
compute_nc_classification(
X, y, use_repeats_only=use_repeats_only, soft=True, per_class=per_class,
use_index=use_index, score_func=roc_auc_score
)
@pytest.mark.parametrize("y_type", ["integer", "string"])
@pytest.mark.parametrize("per_class", [False, True])
def test_nc_classification_ytype(y_type, per_class):
X, y = _load_data(classification=True)
if y_type == 'integer':
# Convert strings to integers
y = pd.Series(pd.get_dummies(y).to_numpy().argmax(axis=1), index=y.index)
nc = compute_nc_classification(
X, y, use_repeats_only=False, soft=True, per_class=per_class,
use_index=False, score_func=roc_auc_score
)
@pytest.mark.parametrize("use_index", [False, True])
@pytest.mark.parametrize("use_repeats_only", [False, True])
def test_nc_regression(use_index, use_repeats_only):
X, y = _load_data(classification=False)
compute_nc_regression(
X, y, use_repeats_only=use_repeats_only,
use_index=use_repeats_only
)
@pytest.mark.parametrize("use_index", [False, True])
@pytest.mark.parametrize("classification", [False, True])
def test_reduce_repeats(use_index, classification):
X, y = _load_data(classification=classification)
reduce_repeats(X, y, categorical=classification, use_index=use_index)