-
Notifications
You must be signed in to change notification settings - Fork 0
/
baseline_pipeline.py
executable file
·68 lines (51 loc) · 1.81 KB
/
baseline_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""
File defining baseline to which we will compare our results.
"""
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.neural_network import MLPRegressor
from dataset_info import get_dataset_info
import pandas as pd
class Dummy(BaseEstimator, TransformerMixin):
def __init__(self):
pass
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
return X
def get_baseline_full_pipeline(df: pd.DataFrame, target: str) -> Pipeline:
"""
Creates a baseline pipeline with machine learning algorithm attached to it.
Parameters
----------
df : pd.DataFrame
Dataset on which the preprocessing shall be performed, in the form of
a pandas dataframe.
target : str
Name of the target column (for which the machine learning algorithm
should predict the value).
Returns
-------
Pipeline with provided parameters and machine learning model.
"""
num_cols, cat_cols = get_dataset_info(df, target)
baseline_num_pipeline = Pipeline([
('dummy', Dummy())
])
baseline_cat_pipeline = Pipeline([
('ohe', OneHotEncoder(handle_unknown='ignore')),
])
baseline_col_transform = ColumnTransformer([
('num', baseline_num_pipeline, num_cols),
('cat', baseline_cat_pipeline, cat_cols)
])
baseline_full_pipeline = Pipeline([
('transform', baseline_col_transform),
('model', MLPRegressor(hidden_layer_sizes=(50), batch_size = 8,
learning_rate_init = 0.1, verbose = True, max_iter=30))
],
verbose=True
)
return baseline_full_pipeline