diff --git a/data b/data index e96857f3a90..288f4e48250 160000 --- a/data +++ b/data @@ -1 +1 @@ -Subproject commit e96857f3a906b2c4c642993ac036e617b8c49fc8 +Subproject commit 288f4e48250b9ab080180fbc1f83e5819e17498c diff --git a/doc/ipython-notebooks/multiclass/Tree/DecisionTrees.ipynb b/doc/ipython-notebooks/multiclass/Tree/DecisionTrees.ipynb index a32b6eff309..eb3b251d2f5 100644 --- a/doc/ipython-notebooks/multiclass/Tree/DecisionTrees.ipynb +++ b/doc/ipython-notebooks/multiclass/Tree/DecisionTrees.ipynb @@ -1405,9 +1405,9 @@ "source": [ "def train_chaidtree(dependent_var_type,feature_types,num_bins,feats,labels):\n", " # create CHAID tree object\n", - " c = sg.create_machine(\"CHAIDTree\", m_dependent_vartype=dependent_var_type,\n", - " m_feature_types=feature_types,\n", - " m_num_breakpoints=num_bins,\n", + " c = sg.create_machine(\"CHAIDTree\", dependent_vartype=dependent_var_type,\n", + " feature_types=feature_types,\n", + " num_breakpoints=num_bins,\n", " labels=labels)\n", " # train using training features\n", " c.train(feats)\n", @@ -1560,9 +1560,9 @@ "feature_types = np.array([2 for i in range(13)],dtype=np.int32) \n", "\n", "# setup CHAID tree - dependent variable is nominal(0), feature types set, number of bins(20)\n", - "chaid = sg.create_machine(\"CHAIDTree\", m_dependent_vartype=0,\n", - " m_feature_types=feature_types,\n", - " m_num_breakpoints=20)" + "chaid = sg.create_machine(\"CHAIDTree\", dependent_vartype=0,\n", + " feature_types=feature_types,\n", + " num_breakpoints=20)" ] }, { @@ -1644,10 +1644,10 @@ " feature_types[9]=1 \n", "\n", " # setup CHAID-tree\n", - " chaid = sg.create_machine(\"CHAIDTree\", m_dependent_vartype=2,\n", - " m_feature_types=feature_types,\n", - " m_num_breakpoints=10,\n", - " m_max_tree_depth=10)\n", + " chaid = sg.create_machine(\"CHAIDTree\", dependent_vartype=2,\n", + " feature_types=feature_types,\n", + " num_breakpoints=10,\n", + " max_tree_depth=10)\n", "\n", " # set evaluation criteria - mean squared error\n", " accuracy = sg.create_evaluation(\"MeanSquaredError\")\n", @@ -1727,4 +1727,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} +} \ No newline at end of file diff --git a/examples/meta/src/regression/cartree.sg.in b/examples/meta/src/regression/cartree.sg.in index 82ea8c028c7..8c9846b8f8e 100644 --- a/examples/meta/src/regression/cartree.sg.in +++ b/examples/meta/src/regression/cartree.sg.in @@ -14,8 +14,7 @@ ft[0] = False #![set_attribute_types] #![create_machine] -Machine cartree = create_machine("CARTree", nominal=ft, mode=enum EProblemType.PT_REGRESSION, folds=5, apply_cv_pruning=True, seed=1) -cartree.set_labels(labels_train) +Machine cartree = create_machine("CARTree", labels=labels_train, nominal=ft, mode=enum EProblemType.PT_REGRESSION, folds=5, apply_cv_pruning=True, seed=1) #![create_machine] #![train_and_apply] diff --git a/examples/meta/src/regression/chaid_tree.sg.in b/examples/meta/src/regression/chaid_tree.sg.in new file mode 100644 index 00000000000..90c173ecc90 --- /dev/null +++ b/examples/meta/src/regression/chaid_tree.sg.in @@ -0,0 +1,27 @@ +File f_feats_train = read_csv("@SHOGUN_DATA@/regression_1d_linear_features_train.dat") +File f_feats_test = read_csv("@SHOGUN_DATA@/regression_1d_linear_features_test.dat") +File f_labels_train = read_csv("@SHOGUN_DATA@/regression_1d_linear_labels_train.dat") + +#![create_features] +Features feats_train = create_features(f_feats_train) +Features feats_test = create_features(f_feats_test) +Labels labels_train = create_labels(f_labels_train) +#![create_features] + +#![set_feature_types] +IntVector ft(1) +ft[0] = 2 +#![set_feature_types] + +#![create_machine] +Machine chaidtree = create_machine("CHAIDTree", labels=labels_train, dependent_vartype=2, feature_types=ft, num_breakpoints=50) +#![create_machine] + +#![train_and_apply] +chaidtree.train(feats_train) +Labels labels_predict = chaidtree.apply(feats_test) +#![train_and_apply] + +#![extract_weights_labels] +RealVector labels_vector = labels_predict.get_real_vector("labels") +#![extract_weights_labels] diff --git a/examples/undocumented/python/regression_chaidtree.py b/examples/undocumented/python/regression_chaidtree.py deleted file mode 100644 index b2cb4d6a7b0..00000000000 --- a/examples/undocumented/python/regression_chaidtree.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python -from numpy import array, dtype, int32 - -# set input attribute as continuous i.e. 2 -feattypes = array([2],dtype=int32) - -parameter_list = [[500,50,15,0.2,feattypes]] - -def regression_chaidtree(num_train=500,num_test=50,x_range=15,noise_var=0.2,ft=feattypes): - try: - import shogun as sg - from shogun import RegressionLabels, CSVFile, CHAIDTree, PT_REGRESSION - from numpy import random - except ImportError: - print("Could not import Shogun and/or numpy modules") - return - - random.seed(1) - - # form training dataset : y=x with noise - X_train=random.rand(1,num_train)*x_range; - Y_train=X_train+random.randn(num_train)*noise_var - - # form test dataset - X_test=array([[float(i)/num_test*x_range for i in range(num_test)]]) - - # wrap features and labels into Shogun objects - feats_train=sg.create_features(X_train) - feats_test=sg.create_features(X_test) - train_labels=RegressionLabels(Y_train[0]) - - # CHAID Tree formation - c=CHAIDTree(2,feattypes,50) - c.set_labels(train_labels) - c.train(feats_train) - - # Regress on test data - output=c.apply_regression(feats_test).get_labels() - - return c,output - -if __name__=='__main__': - print('CHAIDTree') - regression_chaidtree(*parameter_list[0]) diff --git a/examples/undocumented/python/regression_randomforest.py b/examples/undocumented/python/regression_randomforest.py deleted file mode 100644 index 35e8626b43b..00000000000 --- a/examples/undocumented/python/regression_randomforest.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python -from numpy import array, random - -traindat = '../data/fm_train_real.dat' -testdat = '../data/fm_test_real.dat' -label_traindat = '../data/label_train_multiclass.dat' - -# set input attribute as not nominal (ie. continuous) -feattypes = array([False]) - -parameter_list = [[500,50,15,0.2,feattypes]] - -def regression_randomforest(num_train=500,num_test=50,x_range=15,noise_var=0.2,ft=feattypes): - try: - from shogun import RegressionLabels, CSVFile - except ImportError: - print("Could not import Shogun modules") - return - import shogun as sg - - random.seed(1) - - # form training dataset : y=x with noise - X_train=random.rand(1,num_train)*x_range; - Y_train=X_train+random.randn(num_train)*noise_var - - # form test dataset - X_test=array([[float(i)/num_test*x_range for i in range(num_test)]]) - - # wrap features and labels into Shogun objects - feats_train=sg.create_features(X_train) - feats_test=sg.create_features(X_test) - train_labels=RegressionLabels(Y_train[0]) - - # Random Forest formation - rand_forest=sg.create_machine("RandomForest", features=feats_train, labels=train_labels, num_bags=20) - m = rand_forest.get("machine") - m.put("m_randsubset_size", 1) - m.put("nominal", ft) - rand_forest.put("combination_rule", sg.create_combination_rule("MeanRule")) - rand_forest.get_global_parallel().set_num_threads(1) - rand_forest.train() - - # Regress test data - output=rand_forest.apply_regression(feats_test).get("labels") - - return rand_forest,output - -if __name__=='__main__': - print('RandomForest') - regression_randomforest(*parameter_list[0]) diff --git a/src/interfaces/swig/shogun.i b/src/interfaces/swig/shogun.i index 5632c9a1858..8e02a787ea0 100644 --- a/src/interfaces/swig/shogun.i +++ b/src/interfaces/swig/shogun.i @@ -196,7 +196,7 @@ import org.jblas.*; } else if ($self->has(tag_float64)) $self->put(tag_float64, (float64_t)value); -#ifdef SWIGR +#if defined(SWIGR) || defined(SWIGOCTAVE) else if (Tag> tag_tvec(name); $self->has(tag_tvec)) { SGVector vec(1); diff --git a/src/shogun/multiclass/tree/CHAIDTree.cpp b/src/shogun/multiclass/tree/CHAIDTree.cpp index cafebd947ec..b3973647b6c 100644 --- a/src/shogun/multiclass/tree/CHAIDTree.cpp +++ b/src/shogun/multiclass/tree/CHAIDTree.cpp @@ -1392,14 +1392,14 @@ void CHAIDTree::init() m_cont_breakpoints=SGMatrix(); m_num_breakpoints=0; - SG_ADD(&m_weights,"m_weights", "weights", ParameterProperties::READONLY); - SG_ADD(&m_weights_set,"m_weights_set", "weights set", ParameterProperties::READONLY); - SG_ADD(&m_feature_types,"m_feature_types", "feature types", ParameterProperties::SETTING); - SG_ADD(&m_dependent_vartype,"m_dependent_vartype", "dependent variable type", ParameterProperties::SETTING); - SG_ADD(&m_max_tree_depth,"m_max_tree_depth", "max tree depth", ParameterProperties::HYPER); - SG_ADD(&m_min_node_size,"m_min_node_size", "min node size", ParameterProperties::SETTING); - SG_ADD(&m_alpha_merge,"m_alpha_merge", "alpha-merge", ParameterProperties::HYPER); - SG_ADD(&m_alpha_split,"m_alpha_split", "alpha-split", ParameterProperties::HYPER); - SG_ADD(&m_cont_breakpoints,"m_cont_breakpoints", "breakpoints in continuous attributes", ParameterProperties::SETTING); - SG_ADD(&m_num_breakpoints,"m_num_breakpoints", "number of breakpoints", ParameterProperties::HYPER); + SG_ADD(&m_weights,"weights", "weights", ParameterProperties::READONLY); + SG_ADD(&m_weights_set,"weights_set", "weights set", ParameterProperties::READONLY); + SG_ADD(&m_feature_types,"feature_types", "feature types", ParameterProperties::SETTING); + SG_ADD(&m_dependent_vartype,"dependent_vartype", "dependent variable type", ParameterProperties::SETTING); + SG_ADD(&m_max_tree_depth,"max_tree_depth", "max tree depth", ParameterProperties::HYPER); + SG_ADD(&m_min_node_size,"min_node_size", "min node size", ParameterProperties::SETTING); + SG_ADD(&m_alpha_merge,"alpha_merge", "alpha-merge", ParameterProperties::HYPER); + SG_ADD(&m_alpha_split,"alpha_split", "alpha-split", ParameterProperties::HYPER); + SG_ADD(&m_cont_breakpoints,"cont_breakpoints", "breakpoints in continuous attributes", ParameterProperties::SETTING); + SG_ADD(&m_num_breakpoints,"num_breakpoints", "number of breakpoints", ParameterProperties::HYPER); }