h2oai · mdymczyk · Apr 7, 2018 · May 6, 2018 · May 10, 2018 · Apr 30, 2018
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -6,9 +6,9 @@ LIST(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 SET_DEFAULT_CONFIGURATION_RELEASE()
 
 FIND_PACKAGE(OpenMP)
-FIND_PACKAGE(BLAS REQUIRED)
 FIND_PACKAGE(SWIG REQUIRED)
 FIND_PACKAGE(PythonLibs REQUIRED) # SWIG
+FIND_PACKAGE(BLAS REQUIRED)
 
 INCLUDE(${SWIG_USE_FILE})
 
@@ -20,7 +20,8 @@ OPTION(DEV_BUILD  "Dev build" OFF)
 SET(CMAKE_CXX_STANDARD 11)
 SET(CMAKE_CXX_STANDARD_REQUIRED ON)
 SET(CMAKE_POSITION_INDEPENDENT_CODE ON)
-SET(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -w")
+SET(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -O3 -w")
+
 
 # PythonLibs' PYTHON_INCLUDE_PATH doesn't take into account virtualenv etc.
 # Open to suggestions how to do this better.
@@ -64,14 +65,16 @@ ADD_LIBRARY(commonh2o4gpu OBJECT ${COMMON_SOURCES})
 FILE(GLOB_RECURSE CPU_SOURCES
         src/cpu/*.cpp
         src/cpu/*.h
+        src/base/*.cpp
+        src/base/*.h
         )
 
 ADD_LIBRARY(cpuh2o4gpu STATIC ${CPU_SOURCES} $<TARGET_OBJECTS:commonh2o4gpu>)
 TARGET_LINK_LIBRARIES(cpuh2o4gpu ${BLAS_LIBRARIES})
 #============= BUILD CPU LIBRARY
 
 #============= SWIG
-SET(CMAKE_SWIG_FLAGS -Werror)
+SET(CMAKE_SWIG_FLAGS)
 #============= SWIG
 
 #============= CPU SWIG
@@ -116,13 +119,13 @@ if(USE_CUDA)
 
         SET(GENCODE_FLAGS "")
         FORMAT_GENCODE_FLAGS("${GPU_COMPUTE_VER}" GENCODE_FLAGS)
-        SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler -fPIC; -std=c++11;--expt-extended-lambda;--expt-relaxed-constexpr;${GENCODE_FLAGS};-lineinfo; -w;")
+        SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler -fPIC -std=c++11 --expt-extended-lambda --expt-relaxed-constexpr ${GENCODE_FLAGS} -lineinfo -w")
 
         FILE(GLOB_RECURSE GPU_SOURCES
                 src/*.cu
                 src/*.cuh
-                src/common/*.cpp
-                src/common/*.h
+                src/base/*.cpp
+                src/base/*.h
                 )
 
         CUDA_ADD_LIBRARY(gpuh2o4gpu ${GPU_SOURCES} $<TARGET_OBJECTS:commonh2o4gpu> STATIC)

diff --git a/Dockerfile-build b/Dockerfile-build
@@ -171,4 +171,4 @@ ENV GIT_AUTHOR_NAME="anonymous"
 ENV GIT_AUTHOR_EMAIL="[email protected]"
 ENV GIT_COMMITTER_NAME="anonymous"
 ENV GIT_COMMITTER_EMAIL="[email protected]"
-ENV EMAIL="[email protected]"
+ENV EMAIL="[email protected]"
diff --git a/Makefile b/Makefile
@@ -74,6 +74,8 @@ deps_install:
 #########################################
 # SUBMODULE BUILD TARGETS
 #########################################
+# SUBMODULE BUILD TARGETS
+#########################################
 
 update_submodule:
 	echo ADD UPDATE SUBMODULE HERE
@@ -160,7 +162,6 @@ clean_deps:
 #########################################
 # FULL BUILD AND INSTALL TARGETS
 #########################################
-
 fullinstall: clean alldeps build install
 	mkdir -p src/interface_py/$(DIST_DIR)/$(PLATFORM)/ && mv src/interface_py/dist/*.whl src/interface_py/$(DIST_DIR)/$(PLATFORM)/
 
@@ -170,7 +171,6 @@ buildinstall: alldeps build install
 #########################################
 # DOCKER TARGETS
 #########################################
-
 DOCKER_CUDA_VERSION?=9.0
 
 ifeq (${DOCKER_CUDA_VERSION},8.0)

diff --git a/README.md b/README.md
@@ -147,6 +147,7 @@ Download the Docker file (for linux_x86_64):
     * [CUDA9.2](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-cuda92/h2o4gpu-0.2.0.9999-cuda92-runtime.tar.bz2)
 
 Load and run docker file (e.g. for bleeding-edge of cuda90):
+>>>>>>> Remove nonccl build. Rewrite builds to CentOS. Unify x86_64 and ppc64le builds. Build cleanup.
 ```
 pbzip2 -dc h2o4gpu-0.2.0.9999-cuda90-runtime.tar.bz2 | nvidia-docker load
 mkdir -p log ; nvidia-docker run --name localhost --rm -p 8888:8888 -u `id -u`:`id -g` -v `pwd`/log:/log --entrypoint=./run.sh opsh2oai/h2o4gpu-0.2.0.9999-cuda90-runtime &

diff --git a/ci/Jenkinsfile.template b/ci/Jenkinsfile.template
@@ -1,3 +1,19 @@
+<<<<<<< Updated upstream:ci/Jenkinsfile.template
+=======
+// Just Notes:
+//
+//def jobnums       = [0 , 1 , 2  , 3]
+//def tags          = ["nccl" , "nonccl" , "nccl"  , "nonccl"]
+//def cudatags      = ["cuda8", "cuda8"  , "cuda9" , "cuda9"]
+//def dobuilds      = [1, 0, 0, 0]
+//def dofulltests   = [1, 0, 0, 0]
+//def dopytests     = [1, 0, 0, 0]
+//def doruntimes    = [1, 1, 1, 1]
+//def dockerimagesbuild    = ["nvidia/cuda:8.0-cudnn5-devel-centos7", "nvidia/cuda:8.0-cudnn5-devel-centos7", "nvidia/cuda:9.0-cudnn7-devel-centos7", "nvidia/cuda:9.0-cudnn7-devel-centos7"]
+//def dockerimagesruntime  = ["nvidia/cuda:8.0-cudnn5-runtime-centos7", "nvidia/cuda:8.0-cudnn5-runtime-centos7", "nvidia/cuda:9.0-cudnn7-runtime-centos7", "nvidia/cuda:9.0-cudnn7-runtime-centos7"]
+//def dists         = ["dist1","dist2","dist3","dist4"]
+
+>>>>>>> Stashed changes:Jenkinsfile.utils2
 def benchmark_commit_trigger
 
 pipeline {

diff --git a/ci/Jenkinsfile.utils b/ci/Jenkinsfile.utils
@@ -101,6 +101,7 @@ void publishToS3(BuildInfo buildInfo, String extratag, String platform) {
     if (isBleedingEdge()) {
         bucket = "s3://h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/${artifactId}/${majorVersionTag}${extratag}/"
 
+        arch = ${platform}.split('-')[0]
         def nonLocalVersionTag = versionTag.split('\\+')[0]
         def bleedingEdgeArtifact = "${artifactId}-${nonLocalVersionTag}-cp36-cp36m-linux_${buildArch}.whl"
 

diff --git a/src/base/ffm/batching.h b/src/base/ffm/batching.h
@@ -0,0 +1,111 @@
+/*!
+ * Copyright 2018 H2O.ai, Inc.
+ * License   Apache License Version 2.0 (see LICENSE for details)
+ */
+#pragma once
+
+#include "../../include/data/ffm/data.h"
+#include "../../include/solver/ffm_api.h"
+#include "../../common/logger.h"
+
+namespace ffm {
+
+template<typename T>
+class DatasetBatch {
+
+ public:
+  DatasetBatch() {}
+
+  DatasetBatch(const DatasetBatch &other) : features(other.features), fields(other.fields), values(other.values),
+                                            labels(other.labels), scales(other.scales),
+                                            rowPositions(other.rowPositions), numRows(other.numRows){}
+
+  DatasetBatch(DatasetBatch &&other) noexcept : features(other.features), fields(other.fields), values(other.values),
+                                                labels(other.labels), scales(other.scales),
+                                                rowPositions(other.rowPositions), numRows(other.numRows){}
+
+  DatasetBatch(
+      int *features, int* fields, T* values,
+      int *labels, T *scales,
+      int *rowPositions, int numRows) : features(features), fields(fields), values(values),
+                                              labels(labels), scales(scales),
+                                              rowPositions(rowPositions), numRows(numRows) {}
+
+  // Starting position for each row. Of size nr_rows + 1
+  int* rowPositions;
+
+  // feature:field:value for all the data points in all the rows
+  int* features;
+  int* fields;
+  T* values;
+
+  // label and scale for each row
+  int* labels = nullptr;
+  T* scales;
+
+  // Current position in the batch
+  int pos = 0;
+
+  // Actual samples in the batch
+  int numRows;
+
+  int remaining() {
+    return numRows - pos;
+  }
+
+  bool hasNext() const {
+    return pos < numRows;
+  }
+
+  virtual int widestRow() { return 0.0; }
+};
+
+template<typename T>
+class DatasetBatcher {
+ public:
+  DatasetBatcher() {}
+
+  DatasetBatcher(int numRows) : numRows(numRows) {}
+
+  DatasetBatcher(Dataset<T> &dataset, Params const &params, int rows) : dataset(&dataset), numRows(rows) {}
+
+  bool hasNext() const {
+    return pos < numRows;
+  }
+
+  int remaining() {
+    return numRows - pos;
+  }
+
+  void reset() {
+    pos = 0;
+  }
+
+  virtual DatasetBatch<T> *nextBatch(int batchSize) {
+    int actualBatchSize = batchSize <= this->remaining() && batchSize > 0 ? batchSize : this->remaining();
+
+    int moveBy = this->dataset->rowPositions[this->pos];
+    DatasetBatch<T> *batch = new DatasetBatch<T>(this->dataset->features + moveBy,
+                                                 this->dataset->fields + moveBy,
+                                                 this->dataset->values + moveBy,
+                                                 this->dataset->labels + this->pos,
+                                                 this->dataset->scales + this->pos,
+                                                 this->dataset->rowPositions + this->pos,
+                                                 actualBatchSize);
+    this->pos = this->pos + actualBatchSize;
+
+    return batch;
+  }
+
+  bool empty() {
+    return numRows <= 0;
+  }
+
+ protected:
+  Dataset<T> *dataset;
+  int pos = 0;
+  int numRows = 0;
+
+};
+
+}