Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Field-aware factorization machines #604

Open
wants to merge 26 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
c4a8b21
Remove nonccl build. Rewrite builds to CentOS. Unify x86_64 and ppc64…
mdymczyk Apr 7, 2018
23ee96b
Bring back 1 script and some convenient make targets
mdymczyk May 6, 2018
c0552da
Make full git clone on googletest
mdymczyk May 10, 2018
5e39049
Move tests to a common root folder. Move python req files to python f…
mdymczyk Apr 30, 2018
fa86dd7
Install custom arrow and pillow in runtime dockers for ppc64le. DRY d…
mdymczyk May 1, 2018
a8e92eb
Install git for runtime on ppc64le
mdymczyk May 1, 2018
1f46abb
Install git in runtime docker
mdymczyk May 1, 2018
ba7b3d3
Initial FFM GPU implementation.
mdymczyk May 2, 2018
46f737b
Initial Python bindings
mdymczyk May 11, 2018
8aec2ab
Initial ffm prediction impl
mdymczyk May 11, 2018
d443976
Fix gradient/weights computation. Pass parameters by ref to Python. T…
mdymczyk May 11, 2018
b1b0c6a
Fixes logloss calc. Fixes weight index calculation.
mdymczyk May 11, 2018
d27df65
Fixes label initialization - on the actual object not a copy.
mdymczyk May 13, 2018
0a7514a
Pass ffm model as reference so final weights get updated and passed b…
mdymczyk May 14, 2018
329d2de
Rewrite FFM data structures into pure pointers - much faster but stil…
mdymczyk May 15, 2018
90911f5
Faster wTx computation but still slow and not scalable
mdymczyk May 16, 2018
6ad0d25
FFM wTx using a faster kernel - still only on par with CPU
mdymczyk May 17, 2018
25fb40b
FFM now runs ~2x faster on large data:
mdymczyk May 21, 2018
a127759
Don't copy/allocate weights unnecessarily in the model
mdymczyk May 21, 2018
d2b97a3
Fix indexing issues in FFM
mdymczyk May 25, 2018
94265db
Initial validation dataset handling
mdymczyk May 31, 2018
3990da8
Validation dataset
mdymczyk Jun 1, 2018
f08a917
Validation data and early stopping
mdymczyk Jun 1, 2018
665237a
Support GPU computation of datasets larger than GPU memory
mdymczyk Jun 1, 2018
1732077
FFM CPU
mdymczyk Jun 9, 2018
b32728e
Fix tests and pylint
mdymczyk Jun 26, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ LIST(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
SET_DEFAULT_CONFIGURATION_RELEASE()

FIND_PACKAGE(OpenMP)
FIND_PACKAGE(BLAS REQUIRED)
FIND_PACKAGE(SWIG REQUIRED)
FIND_PACKAGE(PythonLibs REQUIRED) # SWIG
FIND_PACKAGE(BLAS REQUIRED)

INCLUDE(${SWIG_USE_FILE})

Expand All @@ -20,7 +20,8 @@ OPTION(DEV_BUILD "Dev build" OFF)
SET(CMAKE_CXX_STANDARD 11)
SET(CMAKE_CXX_STANDARD_REQUIRED ON)
SET(CMAKE_POSITION_INDEPENDENT_CODE ON)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -w")


# PythonLibs' PYTHON_INCLUDE_PATH doesn't take into account virtualenv etc.
# Open to suggestions how to do this better.
Expand Down Expand Up @@ -64,14 +65,16 @@ ADD_LIBRARY(commonh2o4gpu OBJECT ${COMMON_SOURCES})
FILE(GLOB_RECURSE CPU_SOURCES
src/cpu/*.cpp
src/cpu/*.h
src/base/*.cpp
src/base/*.h
)

ADD_LIBRARY(cpuh2o4gpu STATIC ${CPU_SOURCES} $<TARGET_OBJECTS:commonh2o4gpu>)
TARGET_LINK_LIBRARIES(cpuh2o4gpu ${BLAS_LIBRARIES})
#============= BUILD CPU LIBRARY

#============= SWIG
SET(CMAKE_SWIG_FLAGS -Werror)
SET(CMAKE_SWIG_FLAGS)
#============= SWIG

#============= CPU SWIG
Expand Down Expand Up @@ -116,13 +119,13 @@ if(USE_CUDA)

SET(GENCODE_FLAGS "")
FORMAT_GENCODE_FLAGS("${GPU_COMPUTE_VER}" GENCODE_FLAGS)
SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler -fPIC; -std=c++11;--expt-extended-lambda;--expt-relaxed-constexpr;${GENCODE_FLAGS};-lineinfo; -w;")
SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler -fPIC -std=c++11 --expt-extended-lambda --expt-relaxed-constexpr ${GENCODE_FLAGS} -lineinfo -w")

FILE(GLOB_RECURSE GPU_SOURCES
src/*.cu
src/*.cuh
src/common/*.cpp
src/common/*.h
src/base/*.cpp
src/base/*.h
)

CUDA_ADD_LIBRARY(gpuh2o4gpu ${GPU_SOURCES} $<TARGET_OBJECTS:commonh2o4gpu> STATIC)
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile-build
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,4 @@ ENV GIT_AUTHOR_NAME="anonymous"
ENV GIT_AUTHOR_EMAIL="[email protected]"
ENV GIT_COMMITTER_NAME="anonymous"
ENV GIT_COMMITTER_EMAIL="[email protected]"
ENV EMAIL="[email protected]"
ENV EMAIL="[email protected]"
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ deps_install:
#########################################
# SUBMODULE BUILD TARGETS
#########################################
# SUBMODULE BUILD TARGETS
#########################################

update_submodule:
echo ADD UPDATE SUBMODULE HERE
Expand Down Expand Up @@ -160,7 +162,6 @@ clean_deps:
#########################################
# FULL BUILD AND INSTALL TARGETS
#########################################

fullinstall: clean alldeps build install
mkdir -p src/interface_py/$(DIST_DIR)/$(PLATFORM)/ && mv src/interface_py/dist/*.whl src/interface_py/$(DIST_DIR)/$(PLATFORM)/

Expand All @@ -170,7 +171,6 @@ buildinstall: alldeps build install
#########################################
# DOCKER TARGETS
#########################################

DOCKER_CUDA_VERSION?=9.0

ifeq (${DOCKER_CUDA_VERSION},8.0)
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ Download the Docker file (for linux_x86_64):
* [CUDA9.2](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-cuda92/h2o4gpu-0.2.0.9999-cuda92-runtime.tar.bz2)

Load and run docker file (e.g. for bleeding-edge of cuda90):
>>>>>>> Remove nonccl build. Rewrite builds to CentOS. Unify x86_64 and ppc64le builds. Build cleanup.
```
pbzip2 -dc h2o4gpu-0.2.0.9999-cuda90-runtime.tar.bz2 | nvidia-docker load
mkdir -p log ; nvidia-docker run --name localhost --rm -p 8888:8888 -u `id -u`:`id -g` -v `pwd`/log:/log --entrypoint=./run.sh opsh2oai/h2o4gpu-0.2.0.9999-cuda90-runtime &
Expand Down
16 changes: 16 additions & 0 deletions ci/Jenkinsfile.template
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
<<<<<<< Updated upstream:ci/Jenkinsfile.template
=======
// Just Notes:
//
//def jobnums = [0 , 1 , 2 , 3]
//def tags = ["nccl" , "nonccl" , "nccl" , "nonccl"]
//def cudatags = ["cuda8", "cuda8" , "cuda9" , "cuda9"]
//def dobuilds = [1, 0, 0, 0]
//def dofulltests = [1, 0, 0, 0]
//def dopytests = [1, 0, 0, 0]
//def doruntimes = [1, 1, 1, 1]
//def dockerimagesbuild = ["nvidia/cuda:8.0-cudnn5-devel-centos7", "nvidia/cuda:8.0-cudnn5-devel-centos7", "nvidia/cuda:9.0-cudnn7-devel-centos7", "nvidia/cuda:9.0-cudnn7-devel-centos7"]
//def dockerimagesruntime = ["nvidia/cuda:8.0-cudnn5-runtime-centos7", "nvidia/cuda:8.0-cudnn5-runtime-centos7", "nvidia/cuda:9.0-cudnn7-runtime-centos7", "nvidia/cuda:9.0-cudnn7-runtime-centos7"]
//def dists = ["dist1","dist2","dist3","dist4"]

>>>>>>> Stashed changes:Jenkinsfile.utils2
def benchmark_commit_trigger

pipeline {
Expand Down
1 change: 1 addition & 0 deletions ci/Jenkinsfile.utils
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ void publishToS3(BuildInfo buildInfo, String extratag, String platform) {
if (isBleedingEdge()) {
bucket = "s3://h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/${artifactId}/${majorVersionTag}${extratag}/"

arch = ${platform}.split('-')[0]
def nonLocalVersionTag = versionTag.split('\\+')[0]
def bleedingEdgeArtifact = "${artifactId}-${nonLocalVersionTag}-cp36-cp36m-linux_${buildArch}.whl"

Expand Down
111 changes: 111 additions & 0 deletions src/base/ffm/batching.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*!
* Copyright 2018 H2O.ai, Inc.
* License Apache License Version 2.0 (see LICENSE for details)
*/
#pragma once

#include "../../include/data/ffm/data.h"
#include "../../include/solver/ffm_api.h"
#include "../../common/logger.h"

namespace ffm {

template<typename T>
class DatasetBatch {

public:
DatasetBatch() {}

DatasetBatch(const DatasetBatch &other) : features(other.features), fields(other.fields), values(other.values),
labels(other.labels), scales(other.scales),
rowPositions(other.rowPositions), numRows(other.numRows){}

DatasetBatch(DatasetBatch &&other) noexcept : features(other.features), fields(other.fields), values(other.values),
labels(other.labels), scales(other.scales),
rowPositions(other.rowPositions), numRows(other.numRows){}

DatasetBatch(
int *features, int* fields, T* values,
int *labels, T *scales,
int *rowPositions, int numRows) : features(features), fields(fields), values(values),
labels(labels), scales(scales),
rowPositions(rowPositions), numRows(numRows) {}

// Starting position for each row. Of size nr_rows + 1
int* rowPositions;

// feature:field:value for all the data points in all the rows
int* features;
int* fields;
T* values;

// label and scale for each row
int* labels = nullptr;
T* scales;

// Current position in the batch
int pos = 0;

// Actual samples in the batch
int numRows;

int remaining() {
return numRows - pos;
}

bool hasNext() const {
return pos < numRows;
}

virtual int widestRow() { return 0.0; }
};

template<typename T>
class DatasetBatcher {
public:
DatasetBatcher() {}

DatasetBatcher(int numRows) : numRows(numRows) {}

DatasetBatcher(Dataset<T> &dataset, Params const &params, int rows) : dataset(&dataset), numRows(rows) {}

bool hasNext() const {
return pos < numRows;
}

int remaining() {
return numRows - pos;
}

void reset() {
pos = 0;
}

virtual DatasetBatch<T> *nextBatch(int batchSize) {
int actualBatchSize = batchSize <= this->remaining() && batchSize > 0 ? batchSize : this->remaining();

int moveBy = this->dataset->rowPositions[this->pos];
DatasetBatch<T> *batch = new DatasetBatch<T>(this->dataset->features + moveBy,
this->dataset->fields + moveBy,
this->dataset->values + moveBy,
this->dataset->labels + this->pos,
this->dataset->scales + this->pos,
this->dataset->rowPositions + this->pos,
actualBatchSize);
this->pos = this->pos + actualBatchSize;

return batch;
}

bool empty() {
return numRows <= 0;
}

protected:
Dataset<T> *dataset;
int pos = 0;
int numRows = 0;

};

}
Loading