This repository has been archived by the owner on Jun 22, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 21
/
neptune_random_search.yaml
86 lines (75 loc) · 2.07 KB
/
neptune_random_search.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
project: ORGANIZATION/Santander-Value-Prediction-Challenge
name: Santander-Value-Prediction-Challenge
tags: [solution-3]
metric:
channel: 'RMSLE'
goal: minimize
exclude:
- output
- imgs
- neptune.log
- offline_job.log
- .git
- .github
- .idea
- .ipynb_checkpoints
- Untitled.ipynb
parameters:
# Data
train_filepath: YOUR/PATH/TO/train.csv
test_filepath: YOUR/PATH/TO/test.csv
sample_submission_filepath: YOUR/PATH/TO/sample_submission.csv
experiment_directory: YOUR/PATH/WORKDIR
# Kaggle
kaggle_api: 0
kaggle_message: 'solution-3'
# Data preparation
n_cv_splits: 5
validation_size: 0.1
shuffle: 1
# Execution
clean_experiment_directory_before_training: 1
num_workers: 4
verbose: 1
# Data Cleaning
drop_zero_fraction__threshold: 0.98
variance_threshold__threshold: 0.0
# Feature Extraction
row_aggregations__bucket_nrs: "[1, 2]"
truncated_svd__use: False
truncated_svd__n_components: 50
truncated_svd__n_iter: 10
pca__use: False
pca__n_components: 100
fast_ica__use: False
fast_ica__n_components: 15
factor_analysis__use: True
factor_analysis__n_components: 50
gaussian_random_projection__use: False
gaussian_random_projection__n_components: 50
gaussian_projection__eps: 0.1
sparse_random_projection__use: True
sparse_random_projection__n_components: 50
# Light GBM
lgbm_random_search_runs: 500
lgbm__device: cpu # gpu cpu
lgbm__boosting_type: gbdt
lgbm__objective: rmse
lgbm__metric: rmse
lgbm__number_boosting_rounds: 10000
lgbm__early_stopping_rounds: 100
lgbm__learning_rate: 0.01
lgbm__num_leaves: '[10, 50]'
lgbm__max_depth: '[1, 20]'
lgbm__min_child_samples: '[1, 20]'
lgbm__max_bin: '[180, 500]' # at most 255 for device=gpu
lgbm__subsample: '[0.8, 0.9, 0.99, 0.6, 0.7, "list"]'
lgbm__subsample_freq: 1
lgbm__colsample_bytree: 0.8
lgbm__min_child_weight: '[1, 20]'
lgbm__reg_lambda: '[0.0, 0.1, "uniform"]'
lgbm__reg_alpha: '[0.0, 0.1, "uniform"]'
lgbm__scale_pos_weight: 1
lgbm__zero_as_missing: True
# Postprocessing
aggregation_method: mean