Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add oneDNN #937

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
87b8d53
Add oneDNN submodule to 3rd_party
graemenail May 5, 2022
e72a0dd
Add oneDNN to CMake
graemenail May 5, 2022
d341447
Don't build DNNL examples
graemenail May 5, 2022
72e0b1a
Allow static builds of DNNL
graemenail May 5, 2022
641a816
Remove MKL include from config parser
graemenail May 17, 2022
1e8555f
Remove MKL from microsoft/quicksand
graemenail May 17, 2022
2b44b3f
Remove MKL omatcopy from FBGEMM packed_gemm
graemenail May 17, 2022
bdc02f6
Remove MKL from GitHub Actions
graemenail May 17, 2022
49140fc
Remove MKL call from 3in4 transpose, reverting to plain copy (on forw…
graemenail May 17, 2022
9da225e
Replace MKL sgemm with oneDNN sgemm
graemenail May 17, 2022
348fa95
Remove MKL from CMake and improve oneDNN CMake
graemenail May 17, 2022
6e60006
Remove MKL guards in prod. This also removes sgemm_batched
graemenail May 17, 2022
5879c20
Use int in loop for ProdBatched
graemenail May 17, 2022
586d20c
oneDNN only use OMP runtime when specified
graemenail May 18, 2022
3a5d3f3
Move MSVC unicode flags out of global flags
graemenail May 30, 2022
bfe7146
Disable DNNL JIT Profiling
graemenail May 19, 2022
e1bba93
Cache Boost
graemenail May 30, 2022
201321b
Clean up after debug build
graemenail Jun 3, 2022
58a1a44
Remove FindMKL.cmake
graemenail Jun 3, 2022
2b88d54
Update Changelog
graemenail Jun 3, 2022
4e3572d
Remove MKL from release workflow
graemenail Jun 3, 2022
8852700
Replace MKL reference in docs
graemenail Jun 3, 2022
ce859c4
Fix comments mentioning MKL
graemenail Jun 3, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 0 additions & 16 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,6 @@ jobs:
- name: Install dependencies
run: sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler libboost-system-dev gcc-${{ env.gcc_version }} g++-${{ env.gcc_version }}

# https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
- name: Install MKL
run: |
wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add -
sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list"
sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list"
sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088
# The script simplifies installation of different versions of CUDA
- name: Install CUDA
run: ./scripts/ci/install_cuda_ubuntu.sh ${{ env.cuda_version }}
Expand Down Expand Up @@ -156,15 +149,6 @@ jobs:
with:
submodules: recursive

- name: Download MKL
run: |
C:\msys64\usr\bin\wget.exe -nv https://romang.blob.core.windows.net/mariandev/ci/mkl-2020.1-windows-static.zip -O mkl.zip
Expand-Archive -Force mkl.zip ${{ github.workspace }}\mkl
# Set the MKLROOT environment variable so that CMake can find MKL.
# GITHUB_WORKSPACE is an environment variable available on all GitHub-hosted runners
echo "MKLROOT=$env:GITHUB_WORKSPACE/mkl" | Out-File -FilePath $env:GITHUB_ENV -Append
shell: pwsh

- name: Install CUDA
run: |
.\scripts\ci\install_cuda_windows.ps1 '${{ env.cuda_version }}'
Expand Down
10 changes: 0 additions & 10 deletions .github/workflows/ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,6 @@ jobs:
sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler libboost-system-dev \
gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }}

# https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
- name: Install MKL
run: |
wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add -
sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list"
sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list"
sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088
if: matrix.cpu == true

# The script simplifies installation of different versions of CUDA
- name: Install CUDA
run: ./scripts/ci/install_cuda_ubuntu.sh ${{ matrix.cuda }}
Expand Down Expand Up @@ -122,4 +113,3 @@ jobs:
./marian-scorer --version
./marian-server --version
./spm_encode --version

24 changes: 14 additions & 10 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ on:
branches: [ master ]

env:
MKL_URL: "https://romang.blob.core.windows.net/mariandev/ci/mkl-2020.1-windows-static.zip"
BOOST_ROOT: "C:/hostedtoolcache/windows/Boost/1.72.0/x86_64"
BOOST_URL: "https://sourceforge.net/projects/boost/files/boost-binaries/1.72.0/boost_1_72_0-msvc-14.2-64.exe"

Expand All @@ -34,15 +33,6 @@ jobs:
with:
submodules: recursive

- name: Download MKL
run: |
# Wget retries downloading files and is faster than Invoke-WebRequest
C:\msys64\usr\bin\wget.exe -nv ${{ env.MKL_URL }} -O mkl.zip
Expand-Archive -Force mkl.zip ${{ github.workspace }}\mkl
# Set MKLROOT environment variable so that CMake can find MKL
echo "MKLROOT=${{ github.workspace }}\mkl" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
shell: powershell

- name: Install CUDA
run: |
.\scripts\ci\install_cuda_windows.ps1 "10.2"
Expand All @@ -51,9 +41,17 @@ jobs:
echo "$env:CUDA_PATH/bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
shell: powershell
if: matrix.gpu == true
# Cache boost install
- name: Cache Boost
id: cache-boost
uses: actions/cache@v3
with:
path: ${{ env.BOOST_ROOT }}
key: ${{ runner.os }}-${{ env.BOOST_URL }}

# Boost is no longer pre-installed on GitHub-hosted Windows runners
- name: Download Boost
if: ${{ steps.cache-boost.outputs.cache-hit != 'true' }}
run: |
Write-Host "Downloading Boost to ${{ env.BOOST_ROOT }}"
C:\msys64\usr\bin\wget.exe -nv "${{ env.BOOST_URL }}" -O "${{ github.workspace }}/boost.exe"
Expand All @@ -70,6 +68,7 @@ jobs:

# Windows CUDA builds use USE_NCCL=off due to compilation errors.
- name: Build Debug
id: build-debug
uses: lukka/run-cmake@v3
with:
buildDirectory: ${{ github.workspace }}/build/Debug
Expand All @@ -95,6 +94,11 @@ jobs:
# able to find sometimes.
if: matrix.gpu == true

- name: Cleanup Debug
if: steps.build-debug.conclusion == 'success'
working-directory: ${{ github.workspace }}/build/Debug
run: cmake --build . --target clean

# Windows CUDA builds use USE_NCCL=off due to compilation errors
- name: Build Release
uses: lukka/run-cmake@v3
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,6 @@
[submodule "src/3rd_party/simple-websocket-server"]
path = src/3rd_party/simple-websocket-server
url = https://github.com/marian-nmt/Simple-WebSocket-Server
[submodule "src/3rd_party/oneDNN"]
path = src/3rd_party/oneDNN
url = https://github.com/oneapi-src/oneDNN.git
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
## [Unreleased]

### Added
- oneDNN is used for GEMM on CPU.

### Fixed
- Multi-loss casts type to first loss-type before accumulation (aborted before due to missing cast)
Expand All @@ -29,6 +30,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Faster LSH top-k search on CPU
- Updated intgemm to the latest upstream version
- Parameters in npz files are no longer implicitly assumed to be row-ordered. Non row-ordered parameters will result in an abort
- MKL is no longer used as a backend for the CPU.

## [1.11.0] - 2022-02-08

Expand Down
20 changes: 8 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ option(USE_CCACHE "Use ccache compiler cache (https://ccache.dev)" OFF)
option(USE_CUDNN "Use CUDNN library" OFF)
option(USE_DOXYGEN "Build documentation with Doxygen" ON)
option(USE_FBGEMM "Use FBGEMM" OFF)
option(USE_MKL "Compile with MKL support" ON)
option(USE_DNNL "Compile with oneDNN support" ON)
option(USE_MPI "Use MPI library" OFF)
option(USE_NCCL "Use NCCL library" ON)
option(USE_SENTENCEPIECE "Download and compile SentencePiece" ON)
Expand Down Expand Up @@ -84,6 +84,7 @@ endif()
# Set compilation flags
if(MSVC)
# These are used in src/CMakeLists.txt on a per-target basis
list(APPEND EXTRA_DEFINITIONS /DUNICODE /D_UNICODE)
list(APPEND ALL_WARNINGS /WX; /W4;)

# Disabled bogus warnings for CPU intrinsics and Protobuf:
Expand All @@ -105,7 +106,7 @@ if(MSVC)
set(INTRINSICS "/arch:AVX2")
# set(INTRINSICS "/arch:AVX512")
# /bigobj is necessary for expression_operators.cpp. See https://stackoverflow.com/questions/15110580/penalty-of-the-msvs-compiler-flag-bigobj
set(CMAKE_CXX_FLAGS "/EHsc /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE /D_CRT_NONSTDC_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS /bigobj ${DISABLE_GLOBALLY}")
set(CMAKE_CXX_FLAGS "/EHsc /DWIN32 /D_WINDOWS /D_CRT_NONSTDC_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS /bigobj ${DISABLE_GLOBALLY}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MT /O2 ${INTRINSICS} /Zi /MP /GL /DNDEBUG")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MTd /Od /Ob0 ${INTRINSICS} /RTC1 /Zi /D_DEBUG")

Expand Down Expand Up @@ -509,6 +510,11 @@ if(COMPILE_CPU)
set(EXT_LIBS ${EXT_LIBS} intgemm) # Enable intgemm when compiling CPU
add_definitions(-DCOMPILE_CPU=1)
endif()

if(USE_DNNL)
set(EXT_LIBS ${EXT_LIBS} dnnl)
add_definitions(-DDNNL_FOUND=1)
endif(USE_DNNL)
if(USE_APPLE_ACCELERATE)
if(NOT APPLE)
message(FATAL_ERROR "FATAL ERROR: Apple Accelerate only works on macOS.")
Expand All @@ -520,15 +526,6 @@ if(COMPILE_CPU)
set(EXT_LIBS ${EXT_LIBS} "-framework Accelerate")
add_definitions(-DBLAS_FOUND=1)
else(USE_APPLE_ACCELERATE)
if(USE_MKL)
find_package(MKL)
endif(USE_MKL)
if(MKL_FOUND)
include_directories(${MKL_INCLUDE_DIR})
set(EXT_LIBS ${EXT_LIBS} ${MKL_LIBRARIES})
set(BLAS_FOUND TRUE)
add_definitions(-DBLAS_FOUND=1 -DMKL_FOUND=1)
else(MKL_FOUND)
set(BLAS_VENDOR "OpenBLAS")
find_package(BLAS)
if(BLAS_FOUND)
Expand All @@ -539,7 +536,6 @@ if(COMPILE_CPU)
add_definitions(-DBLAS_FOUND=1)
endif(CBLAS_FOUND)
endif(BLAS_FOUND)
endif(MKL_FOUND)
endif(USE_APPLE_ACCELERATE)
endif(COMPILE_CPU)

Expand Down
137 changes: 0 additions & 137 deletions cmake/FindMKL.cmake

This file was deleted.

2 changes: 1 addition & 1 deletion doc/operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ libraries containing device-specific optimisations. These libraries include:
- CBLAS / OpenBLAS
- FBGEMM
- INTGEMM
- MKL
- oneDNN
- GPU
- CUDA (cuBLAS)

Expand Down
21 changes: 20 additions & 1 deletion src/3rd_party/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,25 @@ if(COMPILE_CPU)
endif()
endif(COMPILE_CPU)

if(USE_DNNL)
# OneDNN
set(DNNL_BUILD_TESTS OFF CACHE BOOL "Build dnnl tests")
set(DNNL_BUILD_EXAMPLES OFF CACHE BOOL "Build dnnl examples")

set(DNNL_ENABLE_JIT_PROFILING OFF CACHE INTERNAL "" FORCE)
if(USE_STATIC_LIBS)
set(DNNL_LIBRARY_TYPE "STATIC" CACHE STRING "specifies whether oneDNN library should be SHARED or STATIC" FORCE)
endif(USE_STATIC_LIBS)

if(NOT USE_OPENMP)
set(DNNL_CPU_RUNTIME SEQ CACHE INTERNAL "" FORCE)
endif()

add_subdirectory(./oneDNN)
include_directories(./oneDNN/include)

endif(USE_DNNL)

if(USE_FBGEMM)
# @TODO: find out if this is somehow harmful. This is supppressing CMake warnings for CMAKE_SUPPRESS_DEVELOPER_WARNINGS
# meant to silence CMakeFiles of 3rd_party tools.
Expand Down Expand Up @@ -169,7 +188,7 @@ if(CUDA_FOUND)
endif(COMPILE_AMPERE)

# install nccl in ${CMAKE_BINARY_DIR}/local similar to /usr/local linux installation
# Using $(MAKE) instead of $CMAKE_MAKE_PROGRAM in order to make parallelization in NCCL compilation work with make -j16.
# Using $(MAKE) instead of $CMAKE_MAKE_PROGRAM in order to make parallelization in NCCL compilation work with make -j16.
# Apparently this does not get properly propagated otherwise and builts with only a single thread/process.
ExternalProject_Add(nccl_install
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/nccl
Expand Down
1 change: 1 addition & 0 deletions src/3rd_party/oneDNN
Submodule oneDNN added at 11fa74
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ set(MARIAN_SOURCES

add_library(marian STATIC ${MARIAN_SOURCES})

target_compile_options(marian PRIVATE ${ALL_WARNINGS})
target_compile_options(marian PRIVATE ${ALL_WARNINGS} ${EXTRA_DEFINITIONS})

# Generate git_revision.h to reflect current git revision information
# [https://stackoverflow.com/questions/1435953/how-can-i-pass-git-sha1-to-compiler-as-definition-using-cmake]
Expand Down
Loading