diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index f165597e..3f01875e 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -21,11 +21,11 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - container: ["alicevision/popsift-deps:cuda10.2-ubuntu18.04", "alicevision/popsift-deps:cuda11.8.0-ubuntu20.04", "alicevision/popsift-deps:cuda12.1.0-ubuntu22.04"] + container: ["alicevision/popsift-deps:cuda11.8.0-ubuntu20.04", "alicevision/popsift-deps:cuda12.1.0-ubuntu22.04"] build_tpe: ["Release", "Debug"] exclude: # excludes debug on this one as it has a segmentation fault during the compilation (!) - - container: "alicevision/popsift-deps:cuda12.1.0-ubuntu22.04" + - container: "alicevision/popsift-deps:cuda11.8.0-ubuntu20.04" build_tpe: "Debug" container: diff --git a/.travis.yml b/.travis.yml index dd24a21b..030cb775 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,4 @@ -dist: xenial +dist: jammy language: cpp compiler: gcc @@ -13,10 +13,8 @@ addons: env: matrix: - - CUDA_VERSION_MAJOR="8" CUDA_VERSION_MINOR="0" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.61-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" - - CUDA_VERSION_MAJOR="9" CUDA_VERSION_MINOR="2" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.148-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" - - CUDA_VERSION_MAJOR="10" CUDA_VERSION_MINOR="2" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.89-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" - - CUDA_VERSION_MAJOR="11" CUDA_VERSION_MINOR="0" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.2-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" + - CUDA_VERSION_MAJOR="11" CUDA_VERSION_MINOR="8" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.0-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" + - CUDA_VERSION_MAJOR="12" CUDA_VERSION_MINOR="5" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.1-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" global: @@ -34,7 +32,7 @@ env: - POPSIFT_APP_INSTALL_RELEASE=${POPSIFT_APP_BUILD_RELEASE}/install - POPSIFT_APP_INSTALL_DEBUG=${POPSIFT_APP_BUILD_DEBUG}/install # CMAKE - - CMAKE_URL="https://cmake.org/files/v3.13/cmake-3.13.5-Linux-x86_64.tar.gz" + - CMAKE_URL="https://github.com/Kitware/CMake/releases/download/v3.30.1/cmake-3.30.1-linux-x86_64.tar.gz" - CMAKE_ROOT=${TRAVIS_BUILD_DIR}/cmake - CMAKE_SOURCE=${CMAKE_ROOT}/source - CMAKE_INSTALL=${CMAKE_ROOT}/install @@ -57,30 +55,12 @@ before_install: fi install: - - UBUNTU_VERSION=ubuntu1604 - - > - if [ ${CUDA_VERSION_MAJOR} -lt 11 ]; then - CUDA_REPO_PKG=cuda-repo-${UBUNTU_VERSION}_${CUDA_PKG_LONGVERSION}_amd64.deb - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/$CUDA_REPO_PKG - travis_retry sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub - sudo dpkg -i $CUDA_REPO_PKG - rm ${CUDA_REPO_PKG} - travis_retry sudo apt-get -y update - # cuda > 10.0 changed cublas naming - if [ ${CUDA_VERSION_MAJOR} -lt 10 ]; then - CUBLAS_PKG=cuda-cublas-dev-$CUDA_PKG_VERSION - else - CUBLAS_PKG=libcublas-dev - fi - travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION ${CUBLAS_PKG} cuda-curand-dev-$CUDA_PKG_VERSION - sudo ln -s /usr/local/cuda-${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} /usr/local/cuda - else - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-ubuntu1604.pin - travis_retry sudo mv cuda-ubuntu1604.pin /etc/apt/preferences.d/cuda-repository-pin-600 - travis_retry sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub - travis_retry sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/ /" - sudo apt-get update && sudo apt-get -y install cuda - fi + - UBUNTU_VERSION=ubuntu2204 + - wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/cuda-${UBUNTU_VERSION}.pin + - travis_retry sudo mv cuda-${UBUNTU_VERSION}.pin /etc/apt/preferences.d/cuda-repository-pin-600 + - travis_retry sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/3bf863cc.pub + - travis_retry sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/ /" + - sudo apt-get update && sudo apt-get -y install cuda # - CUDA_REPO_PKG=cuda-repo-${UBUNTU_VERSION}_${CUDA_PKG_LONGVERSION}_amd64.deb # - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/$CUDA_REPO_PKG @@ -131,3 +111,4 @@ cache: apt: true directories: - ${CMAKE_INSTALL} + diff --git a/CHANGES.md b/CHANGES.md index 9608b25d..1cfc0b51 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -17,6 +17,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.0.0] - YYYY-MM-DD +## 2024 + +- CMake: CUDA as first-order language, different CC selection + ### Added - Improved checks for CUDA textures [PR](https://github.com/alicevision/popsift/pull/89) - CMake: Improved support for all Cuda CC [PR](https://github.com/alicevision/popsift/pull/75) diff --git a/CMakeLists.txt b/CMakeLists.txt index f39f2fec..a9c033cb 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,16 @@ -# CMake below 3.4 does not work with CUDA separable compilation at all -cmake_minimum_required(VERSION 3.12) +# First-order language CUDA requires at least CMake 3.18 +cmake_minimum_required(VERSION 3.24) -project(PopSift VERSION 1.0.0 LANGUAGES CXX) +project(PopSift VERSION 1.0.0 LANGUAGES CXX CUDA) + +# Policy to support CUDA as a first-order language for CMake. +# Since CMake 3.18. See https://cmake.org/cmake/help/latest/policy/CMP0104.html +cmake_policy(SET CMP0104 NEW) + +set(CMAKE_CUDA_ARCHITECTURES "all-major" + CACHE + STRING "Which CUDA CCs to support: native, all, all-major or an explicit list delimited by semicolons" + FORCE) # Set build path as a folder named as the platform (linux, windows, darwin...) plus the processor type set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") @@ -16,7 +25,6 @@ option(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce co option(PopSift_USE_NORMF "The __normf function computes Euclidean distance on large arrays. Fast but stability is uncertain." OFF) option(PopSift_NVCC_WARNINGS "Switch on several additional warning for CUDA nvcc" OFF) option(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) -option(PopSift_NO_DEPRECATED_CUDA_SM_WARNINGS "Suppress warnings about soon to be deprecated cuda SM" ON) option(BUILD_SHARED_LIBS "Build shared libraries" ON) if(PopSift_USE_POSITION_INDEPENDENT_CODE AND NOT MSVC) @@ -55,17 +63,15 @@ include(GNUInstallDirs) if(BUILD_SHARED_LIBS) message(STATUS "BUILD_SHARED_LIBS ON") - # Need to declare CUDA_USE_STATIC_CUDA_RUNTIME as an option to ensure that it is not overwritten in FindCUDA. - option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" OFF) - set(CUDA_USE_STATIC_CUDA_RUNTIME OFF) - # Workaround to force deactivation of cuda static runtime for cmake < 3.10 - set(CUDA_cudart_static_LIBRARY 0) + # Auto-build dll exports on Windows set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) + + set(CMAKE_CUDA_RUNTIME_LIBRARY Shared) else() message(STATUS "BUILD_SHARED_LIBS OFF") - option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" ON) - set(CUDA_USE_STATIC_CUDA_RUNTIME ON) + + set(CMAKE_CUDA_RUNTIME_LIBRARY Static) endif() # Require threads because of std::thread. @@ -74,26 +80,15 @@ find_package(Threads REQUIRED) ################### # CUDA ################### -find_package(CUDA 7.0 REQUIRED) +include(CheckLanguage) +check_language(CUDA) -if(NOT CUDA_FOUND) - message(FATAL_ERROR "Could not find CUDA >= 7.0") -endif() +# Use this if necessary: "cmake -DCUDAToolkit_ROOT=/some/path" +# target_link_libraries(binary_linking_to_cudart PRIVATE CUDA::cudart) +find_package(CUDAToolkit) -message(STATUS "CUDA Version is ${CUDA_VERSION}") - -include(ChooseCudaCC) -if(NOT DEFINED PopSift_CUDA_CC_LIST) - chooseCudaCC(PopSift_CUDA_CC_LIST_BASIC - PopSift_CUDA_GENCODE_FLAGS - MIN_CC 30 - MIN_CUDA_VERSION 7.0) - set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile") -else() - getFlagsForCudaCCList(PopSift_CUDA_CC_LIST - PopSift_CUDA_GENCODE_FLAGS) -endif() -list(APPEND CUDA_NVCC_FLAGS "${PopSift_CUDA_GENCODE_FLAGS}") +message(STATUS "CUDA Version is ${CUDAToolkit_VERSION}") +set(CUDA_VERSION ${CUDAToolkit_VERSION}) if(PopSift_USE_NVTX_PROFILING) message(STATUS "PROFILING CPU CODE: NVTX is in use") @@ -104,46 +99,8 @@ if(PopSift_ERRCHK_AFTER_KERNEL) list(APPEND CUDA_NVCC_FLAGS "-DERRCHK_AFTER_KERNEL") endif() -set(CUDA_SEPARABLE_COMPILATION ON) - -if(PopSift_NO_DEPRECATED_CUDA_SM_WARNINGS) - list(APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets") -endif() - -if(UNIX AND NOT APPLE) - list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-rdynamic") - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-v") - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-warn-double-usage") - list(APPEND CUDA_NVCC_FLAGS_DEBUG "--keep") - list(APPEND CUDA_NVCC_FLAGS_DEBUG "--source-in-ptx") -endif() - -# The following if should not be necessary, but apparently there is a bug in FindCUDA.cmake that -# generate an empty string in the nvcc command line causing the compilation to fail. -# see https://gitlab.kitware.com/cmake/cmake/issues/16411 -if(CMAKE_BUILD_TYPE STREQUAL "Debug") - message(STATUS "Building in debug mode") - list(APPEND CUDA_NVCC_FLAGS_DEBUG "-G") -endif() -list(APPEND CUDA_NVCC_FLAGS_RELEASE "-O3") - -if(PopSift_USE_POSITION_INDEPENDENT_CODE AND NOT MSVC) - list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-fPIC") -endif() - -# this is to ensure that on MSVC the flags for the linker are properly propagate even to the intermediate -# linking step. This seems not the case e.g. on vcpkg using ninja build. -if(MSVC) - if(BUILD_SHARED_LIBS) - set(PopSift_MVSC_LINKER "/MD") - else() - set(PopSift_MVSC_LINKER "/MT") - endif() - if(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(PopSift_MVSC_LINKER "${PopSift_MVSC_LINKER}d") - endif() - list(APPEND CUDA_NVCC_FLAGS -Xcompiler ${PopSift_MVSC_LINKER}) -endif() +# This may not be required any more. +set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # default stream per-thread implies that each host thread has one non-synchronizing 0-stream # currently, the code requires legacy mode @@ -159,7 +116,7 @@ if(CUDA_VERSION VERSION_GREATER_EQUAL "7.5") endif() endif() -set(PopSift_CXX_STANDARD 14) # Thrust/CUB requires C++14 starting with CUDA SDK 11 +set(PopSift_CXX_STANDARD 17) # Thrust/CUB requires C++14 starting with CUDA SDK 11 if(CUDA_VERSION_MAJOR LESS_EQUAL 8) set(PopSift_CXX_STANDARD 11) endif() @@ -181,9 +138,9 @@ else() endif() if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0") - set(HAVE_SHFL_DOWN_SYNC 1) + set(PopSift_HAVE_SHFL_DOWN_SYNC 1) else() - set(HAVE_SHFL_DOWN_SYNC 0) + set(PopSift_HAVE_SHFL_DOWN_SYNC 0) endif() if(NOT PopSift_USE_GRID_FILTER) @@ -193,12 +150,8 @@ else() set(DISABLE_GRID_FILTER 0) endif() -# library required for CUDA dynamic parallelism, forgotten by CMake 3.4 -cuda_find_library_local_first(CUDA_CUDADEVRT_LIBRARY cudadevrt "\"cudadevrt\" library") - if(PopSift_USE_NVTX_PROFILING) # library required for NVTX profiling of the CPU - cuda_find_library_local_first(CUDA_NVTX_LIBRARY nvToolsExt "NVTX library") set(PopSift_USE_NVTX 1) else() set(PopSift_USE_NVTX 0) @@ -245,9 +198,14 @@ message(STATUS "Use CUDA NVTX for profiling: " ${PopSift_USE_NVTX_PROFILING}) message(STATUS "Synchronize and check CUDA error after every kernel: " ${PopSift_ERRCHK_AFTER_KERNEL}) message(STATUS "Grid filtering: " ${PopSift_USE_GRID_FILTER}) message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS}) -message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}") message(STATUS "Install path: " ${CMAKE_INSTALL_PREFIX}) message(STATUS "Testing step: " ${PopSift_USE_TEST_CMD}) + +message(STATUS "CMAKE_CUDA_COMPILER = ${CMAKE_CUDA_COMPILER}") +message(STATUS "CMAKE_CUDA_COMPILER_ID = ${CMAKE_CUDA_COMPILER_ID}") +message(STATUS "CMAKE_CUDA_COMPILER_VERSION = ${CMAKE_CUDA_COMPILER_VERSION}") +message(STATUS "CMAKE_CUDA_ARCHITECTURES = ${CMAKE_CUDA_ARCHITECTURES}") + if(PopSift_USE_TEST_CMD) message(STATUS "Path for test input: " ${PopSift_TESTFILE_PATH}) endif() diff --git a/README.md b/README.md index 418d1278..738794c9 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ PopSift depends on: Optionally, for the provided applications: -* Boost >= 1.55 (required components {atomic, chrono, date-time, system, thread}-dev) +* Boost >= 1.71 (required components {atomic, chrono, date-time, system, thread}-dev) * DevIL (libdevil-dev) can be used to load a broader range of image formats, otherwise only pgm is supported. diff --git a/appveyor.yml b/appveyor.yml index dc822f2b..679ac5b5 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,6 +1,9 @@ +# +# Build system for the PopSift library, including its demo programs. +# version: '1.0.{build}' -image: Visual Studio 2015 +image: Visual Studio 2022 platform: - x64 @@ -14,6 +17,9 @@ configuration: # - DBUILD_SHARED_LIBS: 0 # - DBUILD_SHARED_LIBS: 1 +# +# Check the separate file cudaInstallAppveyor for the installation of CUDA +# install: - cmd: >- call cudaInstallAppveyor.cmd @@ -23,10 +29,22 @@ install: --triplet %PLATFORM%-windows # devil +# +# When updating to a new version of visual studio, change the generation string after +# -G and find the suitable toolkit version that is listed after -T (v143 in this case). +# The CUDA Toolkit and the VS version must match. The matches are found in the CUDA +# documentation. +# The platform in this case is x64. Apparently, you need in after -T for VS and after -A +# for CUDA. +# You can only have one -T parameter, but you can separate several options with a comma. +# +# PopSift_USE_GRID_FILTER is off in this build because the installation of CUDA Thrust +# in cudaInstallAppveyor is not happening yet. +# before_build: - md build - cd build - - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DBUILD_SHARED_LIBS=%DBUILD_SHARED_LIBS% -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=%DBUILD_SHARED_LIBS% -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. + - cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS:BOOL=ON -DPopSift_USE_NVTX_PROFILING:BOOL=OFF -DPopSift_USE_GRID_FILTER:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=ON -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. - ls -l build: diff --git a/cmake/ChooseCudaCC.cmake b/cmake/ChooseCudaCC.cmake deleted file mode 100755 index aba4eb91..00000000 --- a/cmake/ChooseCudaCC.cmake +++ /dev/null @@ -1,184 +0,0 @@ -# -# This file contains two functions: -# chooseCudaCC -# getFlagsForCudaCCList -# -# Motivation: -# CUDA hardware and SDKs are developing over time, different SDK support different -# hardware, and supported hardware differs depending on platform even for the same -# SDK version. This file attempts to provide a function that returns a valid selection -# of hardware for the current SDK and platform. It will require updates as CUDA develops, -# and it is currently not complete in terms of existing platforms that support CUDA. -# - -# -# Return the minimal set of supported Cuda CC -# -# Usage: -# chooseCudaCC(SUPPORTED_CC SUPPORTED_GENCODE_FLAGS -# [MIN_CUDA_VERSION X.Y] -# [MIN_CC XX ]) -# -# SUPPORTED_CC out variable. Stores the list of supported CC. -# SUPPORTED_GENCODE_FLAGS out variable. List of gencode flags to append to, e.g., CUDA_NVCC_FLAGS -# MIN_CUDA_VERSION the minimal supported version of cuda (e.g. 7.5, default 7.0). -# MIN_CC minimal supported Cuda CC by the project (e.g. 35, default 20) -# -# This function does not edit cache entries or variables in the parent scope -# except for the variables whose names are supplied for SUPPORTED_CC and -# SUPPORTED_GENCODE_FLAGS -# -# You may want to cache SUPPORTED_CC and append SUPPORTED_GENCODE_FLAGS to -# CUDA_NVCC_FLAGS. -# Like this: -# set(MYCC ${MYCC} CACHE STRING "CUDA CC versions to compile") -# end -# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${MY_GENCODE_FLAGS}") -# -function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) - set(options "") - set(oneValueArgs MIN_CUDA_VERSION MIN_CC) - set(multipleValueArgs "") - cmake_parse_arguments(CHOOSE_CUDA "${options}" "${oneValueArgs}" "${multipleValueArgs}" ${ARGN}) - - if(NOT DEFINED CHOOSE_CUDA_MIN_CC) - set(CHOOSE_CUDA_MIN_CC 20) - endif() - if(NOT DEFINED CHOOSE_CUDA_MIN_CUDA_VERSION) - set(CHOOSE_CUDA_MIN_CUDA_VERSION 7.0) - endif() - - find_package(CUDA ${CHOOSE_CUDA_MIN_CUDA_VERSION} REQUIRED) - - if(NOT CUDA_FOUND) - message(FATAL_ERROR "Could not find CUDA >= ${CHOOSE_CUDA_MIN_CUDA_VERSION}") - endif() - - # - # Create a list of possible CCs for each host processor. - # This may require tuning: CUDA cards exist in AIX machines with POWER CPUs, - # it is possible that non-Tegra ARM systems exist as well. - # For now, this is my best guess. - # - set(TEGRA_SUPPORTED_PROCESSORS "armv71;arm;aarch64") - set(OTHER_SUPPORTED_PROCESSORS "i686;x86_64;AMD64") - - set(CC_LIST_BY_SYSTEM_PROCESSOR "") - if(CMAKE_SYSTEM_PROCESSOR IN_LIST OTHER_SUPPORTED_PROCESSORS) - list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75;80;86;89;90") - endif() - if(CMAKE_SYSTEM_PROCESSOR IN_LIST TEGRA_SUPPORTED_PROCESSORS) - list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "32;53;62;72") - endif() - if(NOT CC_LIST_BY_SYSTEM_PROCESSOR) - message(FATAL_ERROR "Unknown how to build for ${CMAKE_SYSTEM_PROCESSOR}") - endif() - - # - # Default setting of the CUDA CC versions to compile. - # Shortening the lists saves a lot of compile time. - # - - # The current version last time this list was updated was CUDA 12.1. - if(CUDA_VERSION VERSION_GREATER_EQUAL 12) - set(CUDA_MIN_CC 50) - set(CUDA_MAX_CC 90) - elseif(CUDA_VERSION VERSION_GREATER_EQUAL 11.8) - set(CUDA_MIN_CC 35) - set(CUDA_MAX_CC 90) - elseif(CUDA_VERSION VERSION_GREATER_EQUAL 11.1) - set(CUDA_MIN_CC 35) - set(CUDA_MAX_CC 86) - elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 11) - set(CUDA_MIN_CC 35) - set(CUDA_MAX_CC 80) - elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 10) - set(CUDA_MIN_CC 30) - set(CUDA_MAX_CC 75) - elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 9) - set(CUDA_MIN_CC 30) - set(CUDA_MAX_CC 72) - elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 8) - set(CUDA_MIN_CC 20) - set(CUDA_MAX_CC 62) - elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 7) - set(CUDA_MIN_CC 20) - set(CUDA_MAX_CC 53) - else() - message(FATAL_ERROR "We do not support a CUDA SDK below version 7.0") - endif() - if(${CHOOSE_CUDA_MIN_CC} GREATER ${CUDA_MIN_CC}) - set(CUDA_MIN_CC ${CHOOSE_CUDA_MIN_CC}) - endif() - - set(CC_LIST "") - foreach(CC ${CC_LIST_BY_SYSTEM_PROCESSOR}) - if( (${CC} GREATER_EQUAL ${CUDA_MIN_CC}) AND - (${CC} LESS_EQUAL ${CUDA_MAX_CC}) ) - list(APPEND CC_LIST ${CC}) - endif() - endforeach() - - # - # Add all requested CUDA CCs to the command line for offline compilation - # - set(GENCODE_FLAGS "") - list(SORT CC_LIST) - foreach(CC_VERSION ${CC_LIST}) - list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}") - endforeach() - - # - # Use the highest request CUDA CC for CUDA JIT compilation - # - list(LENGTH CC_LIST CC_LIST_LEN) - MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1") - list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST) - list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}") - - # - # Two variables are exported to the parent scope. One is passed through the - # environment (CUDA_NVCC_FLAGS), the other is passed by name (SUPPORTED_CC) - # - set(${SUPPORTED_GENCODE_FLAGS} "${GENCODE_FLAGS}" PARENT_SCOPE) - set(${SUPPORTED_CC} "${CC_LIST}" PARENT_SCOPE) -endfunction() - -# -# Return the gencode parameters for a given list of CCs. -# -# Usage: -# getFlagsForCudaCCList(INPUT_CC_LIST SUPPORTED_GENCODE_FLAGS) -# -# INPUT_CC_LIST in variable. Contains a list of supported CCs. -# SUPPORTED_GENCODE_FLAGS out variable. List of gencode flags to append to, e.g., CUDA_NVCC_FLAGS -# -function(getFlagsForCudaCCList INPUT_CC_LIST SUPPORTED_GENCODE_FLAGS) - set(CC_LIST "${${INPUT_CC_LIST}}") - - # - # Add all requested CUDA CCs to the command line for offline compilation - # - set(GENCODE_FLAGS "") - list(SORT CC_LIST) - foreach(CC_VERSION ${CC_LIST}) - list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}") - endforeach() - - # - # Use the highest request CUDA CC for CUDA JIT compilation - # - list(LENGTH CC_LIST CC_LIST_LEN) - MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1") - list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST) - list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}") - - message(STATUS "Setting gencode flags: ${GENCODE_FLAGS}") - - # - # Two variables are exported to the parent scope. One is passed through the - # environment (CUDA_NVCC_FLAGS), the other is passed by name (SUPPORTED_CC) - # - set(${SUPPORTED_GENCODE_FLAGS} "${GENCODE_FLAGS}" PARENT_SCOPE) -endfunction() - diff --git a/cmake/Config.cmake.in b/cmake/Config.cmake.in index 30eea599..b8fce13a 100644 --- a/cmake/Config.cmake.in +++ b/cmake/Config.cmake.in @@ -40,6 +40,7 @@ include(CMakeFindDependencyMacro) find_dependency(Threads REQUIRED) +find_dependency(CUDAToolkit REQUIRED) include("${CMAKE_CURRENT_LIST_DIR}/@popsift_targets_export_name@.cmake") check_required_components("@PROJECT_NAME@") diff --git a/cmake/sift_config.h.in b/cmake/sift_config.h.in index 427cfe42..86095a55 100644 --- a/cmake/sift_config.h.in +++ b/cmake/sift_config.h.in @@ -8,11 +8,11 @@ #pragma once -#define POPSIFT_IS_DEFINED(F) F() == 1 - -#define POPSIFT_HAVE_SHFL_DOWN_SYNC() @HAVE_SHFL_DOWN_SYNC@ -#define POPSIFT_HAVE_NORMF() @PopSift_HAVE_NORMF@ -#define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@ -#define POPSIFT_USE_NVTX() @PopSift_USE_NVTX@ +#define POPSIFT_IS_DEFINED(F) F() == 1 +#define POPSIFT_IS_UNDEFINED(F) F() == 0 +#define POPSIFT_HAVE_SHFL_DOWN_SYNC() @PopSift_HAVE_SHFL_DOWN_SYNC@ +#define POPSIFT_HAVE_NORMF() @PopSift_HAVE_NORMF@ +#define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@ +#define POPSIFT_USE_NVTX() @PopSift_USE_NVTX@ diff --git a/cudaInstallAppveyor.cmd b/cudaInstallAppveyor.cmd index 381d4048..9d43f7fa 100644 --- a/cudaInstallAppveyor.cmd +++ b/cudaInstallAppveyor.cmd @@ -1,19 +1,44 @@ @echo off -echo Downloading CUDA toolkit 9 -appveyor DownloadFile https://developer.nvidia.com/compute/cuda/9.1/Prod/local_installers/cuda_9.1.85_windows -FileName cuda_9.1.85_windows.exe +echo Downloading CUDA toolkit 12 for Windows 10 +# appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/12.5.1/network_installers/cuda_12.5.1_windows_network.exe -Filename cuda_12.5.1_windows.exe + +appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvcc.zip +appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.5.82-archive.zip -Filename cuda_cudart.zip +appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvtx.zip +appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.5.82-archive.zip -Filename vs_integration.zip +dir + +echo Unzipping CUDA toolkit 12 +tar -xf cuda_nvcc.zip +tar -xf cuda_cudart.zip +tar -xf cuda_nvtx.zip +tar -xf vs_integration.zip dir -echo Installing CUDA toolkit 9 -cuda_9.1.85_windows.exe -s nvcc_9.1 ^ - cublas_9.1 ^ - cublas_dev_9.1 ^ - cudart_9.1 ^ - curand_9.1 ^ - curand_dev_9.1 -echo CUDA toolkit 9 installed +echo Making CUDA install dir(s) +mkdir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" +mkdir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras" + +echo Copying toolkit files to install dir(s) +xcopy cuda_cudart-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y +xcopy cuda_nvcc-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y +xcopy cuda_nvtx-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y +xcopy visual_studio_integration-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras" /s /e /i /y + + +# echo Installing CUDA toolkit 12 +# cuda_12.5.1_windows.exe +# cuda_9.1.85_windows.exe -s nvcc_12.5 cudart_12.5 + + +echo CUDA toolkit 12 installed dir "%ProgramFiles%" -set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v9.1\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v9.1\libnvvp;%PATH% +set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\libnvvp;%PATH% + +dir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA" +dir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" +dir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\bin" -nvcc -V \ No newline at end of file +nvcc -V diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0380dd41..ff3b3681 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,7 @@ +# Do not specify SHARED or STATIC in add_library. Let the variable BUILD_SHARED_LIBS determine this. -CUDA_ADD_LIBRARY(popsift - popsift/popsift.cpp popsift/popsift.h +add_library(popsift + popsift/popsift.cu popsift/popsift.h popsift/features.cu popsift/features.h popsift/sift_constants.cu popsift/sift_constants.h popsift/sift_conf.cu popsift/sift_conf.h @@ -39,28 +40,35 @@ CUDA_ADD_LIBRARY(popsift popsift/common/vec_macros.h popsift/common/clamp.h) +target_link_libraries(popsift + PUBLIC + CUDA::cudart + Threads::Threads) + +if(PopSift_USE_NVTX_PROFILING) +target_link_libraries(popsift + PUBLIC + CUDA::nvtx3) +endif() + +set_target_properties(popsift PROPERTIES VERSION ${PROJECT_VERSION}) +set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d") +set_target_properties(popsift PROPERTIES CUDA_SEPARABLE_COMPILATION ON) # build directory containing the automatically generated files set(popsift_generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated") # BUILD_INTERFACE allows to include the directory with source only when target is # built in the building tree (ie, not from an install location) +# The CUDA install dir variable has changed from the old CUDA_INCLUDE_DIRS to the new CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES target_include_directories(popsift PUBLIC $ $ $ $ - ${CUDA_INCLUDE_DIRS}) - - -set_target_properties(popsift PROPERTIES VERSION ${PROJECT_VERSION}) -set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d") - -# cannot use PRIVATE here as there is a bug in FindCUDA and CUDA_ADD_LIBRARY -# https://gitlab.kitware.com/cmake/cmake/issues/16097 -target_link_libraries(popsift ${CUDA_CUDADEVRT_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} Threads::Threads) - + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} + ) # EXPORTING THE LIBRARY # diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index 3b28cec8..2379c57d 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -42,7 +42,7 @@ find_package(DevIL COMPONENTS IL ILU) # yields IL_FOUND, IL_LIBRARIES, IL_INCLUD if(PopSift_BOOST_USE_STATIC_LIBS) set(Boost_USE_STATIC_LIBS ON) endif() -find_package(Boost 1.53.0 REQUIRED COMPONENTS filesystem program_options system) +find_package(Boost 1.71.0 REQUIRED COMPONENTS filesystem program_options system) if(WIN32) add_definitions("-DBOOST_ALL_NO_LIB") endif(WIN32) @@ -60,10 +60,6 @@ else() set(PD_COMPILE_OPTIONS "" ) endif() -if(PopSift_USE_NVTX_PROFILING) - list(APPEND PD_LINK_LIBS ${CUDA_NVTX_LIBRARY}) -endif(PopSift_USE_NVTX_PROFILING) - ############################################################# # popsift-demo ############################################################# @@ -73,7 +69,7 @@ add_executable(popsift-demo main.cpp pgmread.cpp pgmread.h) set_property(TARGET popsift-demo PROPERTY CXX_STANDARD 11) target_compile_options(popsift-demo PRIVATE ${PD_COMPILE_OPTIONS} ) -target_include_directories(popsift-demo PUBLIC ${PD_INCLUDE_DIRS}) +target_include_directories(popsift-demo PUBLIC PopSift::popsift ${PD_INCLUDE_DIRS}) target_compile_definitions(popsift-demo PRIVATE ${Boost_DEFINITIONS}) target_link_libraries(popsift-demo PUBLIC PopSift::popsift ${PD_LINK_LIBS}) @@ -87,7 +83,7 @@ add_executable(popsift-match match.cpp pgmread.cpp pgmread.h) set_property(TARGET popsift-match PROPERTY CXX_STANDARD 11) target_compile_options(popsift-match PRIVATE ${PD_COMPILE_OPTIONS} ) -target_include_directories(popsift-match PUBLIC ${PD_INCLUDE_DIRS}) +target_include_directories(popsift-match PUBLIC PopSift::popsift ${PD_INCLUDE_DIRS}) target_compile_definitions(popsift-match PRIVATE ${Boost_DEFINITIONS}) target_link_libraries(popsift-match PUBLIC PopSift::popsift ${PD_LINK_LIBS}) diff --git a/src/application/main.cpp b/src/application/main.cpp index 0eec1c22..bf1128ff 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -30,13 +30,6 @@ #endif #include "pgmread.h" -#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include -#else -#define nvtxRangePushA(a) -#define nvtxRangePop() -#endif - using namespace std; static bool print_dev_info = false; @@ -183,8 +176,6 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) exit( -1 ); } - nvtxRangePushA( "load and convert image - devil" ); - ilImage img; if( img.Load( inputFile.c_str() ) == false ) { cerr << "Could not load image " << inputFile << endl; @@ -200,8 +191,6 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) image_data = img.GetData(); - nvtxRangePop( ); // "load and convert image - devil" - job = PopSift.enqueue( w, h, image_data ); img.Clear(); @@ -209,7 +198,6 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) else #endif { - nvtxRangePushA( "load and convert image - pgmread" ); int w{}; int h{}; image_data = readPGMfile( inputFile, w, h ); @@ -217,8 +205,6 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) exit( EXIT_FAILURE ); } - nvtxRangePop( ); // "load and convert image - pgmread" - if( ! float_mode ) { // PopSift.init( w, h ); @@ -251,21 +237,15 @@ void read_job( SiftJob* job, bool really_write ) << endl; if( really_write ) { - nvtxRangePushA( "Writing features to disk" ); - std::ofstream of( "output-features.txt" ); feature_list->print( of, write_as_uchar ); } delete feature_list; - - if( really_write ) { - nvtxRangePop( ); // Writing features to disk - } } int main(int argc, char **argv) { - cudaDeviceReset(); + popsift::cuda::reset(); popsift::Config config; list inputFiles; diff --git a/src/application/match.cpp b/src/application/match.cpp index 852d9b62..3460975d 100755 --- a/src/application/match.cpp +++ b/src/application/match.cpp @@ -30,13 +30,6 @@ #endif #include "pgmread.h" -#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include -#else -#define nvtxRangePushA(a) -#define nvtxRangePop() -#endif - using namespace std; static bool print_dev_info {false}; @@ -171,7 +164,6 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) unsigned char* image_data; SiftJob* job; - nvtxRangePushA( "load and convert image" ); #ifdef USE_DEVIL if( ! pgmread_loading ) { @@ -189,8 +181,6 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) cout << "Loading " << w << " x " << h << " image " << inputFile << endl; image_data = img.GetData(); - nvtxRangePop( ); - // PopSift.init( w, h ); job = PopSift.enqueue( w, h, image_data ); @@ -206,8 +196,6 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) exit( EXIT_FAILURE ); } - nvtxRangePop( ); - // PopSift.init( w, h ); job = PopSift.enqueue( w, h, image_data ); @@ -219,7 +207,7 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) int main(int argc, char **argv) { - cudaDeviceReset(); + popsift::cuda::reset(); popsift::Config config; string lFile{}; diff --git a/src/popsift/common/device_prop.cu b/src/popsift/common/device_prop.cu index a55821cd..44d47c1d 100644 --- a/src/popsift/common/device_prop.cu +++ b/src/popsift/common/device_prop.cu @@ -14,6 +14,16 @@ namespace popsift { namespace cuda { using namespace std; +void reset() +{ + cudaDeviceReset(); +} + +void sync() +{ + cudaDeviceSynchronize(); +} + device_prop_t::device_prop_t( ) { int currentDevice; diff --git a/src/popsift/common/device_prop.h b/src/popsift/common/device_prop.h index 7a0b142d..8910e9c8 100644 --- a/src/popsift/common/device_prop.h +++ b/src/popsift/common/device_prop.h @@ -13,6 +13,15 @@ namespace popsift { namespace cuda { +/** A call to cudaDeviceReset() + */ +void reset(); + +/** A call to cudaDeviceSynchronize() + */ +void sync(); + + /** * @brief A class to recover, query and print the information about the cuda device. */ diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cu similarity index 100% rename from src/popsift/popsift.cpp rename to src/popsift/popsift.cu diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 3b5f72b8..5654cc76 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -24,7 +24,7 @@ #include #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include +#include #else #define nvtxRangeStartA(a) #define nvtxRangeEnd(a) diff --git a/src/popsift/s_filtergrid.cu b/src/popsift/s_filtergrid.cu index a766c2de..bfe2e64e 100644 --- a/src/popsift/s_filtergrid.cu +++ b/src/popsift/s_filtergrid.cu @@ -10,7 +10,7 @@ #include "sift_pyramid.h" #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include +#include #else #define nvtxRangePushA(a) #define nvtxRangePop() diff --git a/src/popsift/s_image.cu b/src/popsift/s_image.cu index f26b8e3e..a966dd39 100755 --- a/src/popsift/s_image.cu +++ b/src/popsift/s_image.cu @@ -16,7 +16,7 @@ #include #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include +#include #else #define nvtxRangePushA(a) #define nvtxRangePop() diff --git a/src/popsift/s_orientation.cu b/src/popsift/s_orientation.cu index f6b36fcd..b34aaaa1 100644 --- a/src/popsift/s_orientation.cu +++ b/src/popsift/s_orientation.cu @@ -19,7 +19,7 @@ #include #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include +#include #else #define nvtxRangePushA(a) #define nvtxRangePop() diff --git a/src/popsift/sift_constants.h b/src/popsift/sift_constants.h index 20b3012d..883515a7 100755 --- a/src/popsift/sift_constants.h +++ b/src/popsift/sift_constants.h @@ -15,20 +15,27 @@ #ifndef NINF #define NINF (-INF) #endif -#ifdef M_PI -#undef M_PI -// #define M_PI 3.14159265358979323846f -#endif -__device__ static const -float M_PI = 3.14159265358979323846f; -#ifdef M_PI2 -#undef M_PI2 -// #define M_PI2 (2.0F * M_PI) -#endif -__device__ static const -float M_PI2 = 2.0f * 3.14159265358979323846f; -#define M_4RPI (4.0f / M_PI) +#undef USE_CONSTANT_PI +#ifdef USE_CONSTANT_PI + #ifdef M_PI + #undef M_PI + #endif + __device__ static const float M_PI = 3.14159265358979323846f; + #ifdef M_PI2 + #undef M_PI2 + #endif + __device__ static const float M_PI2 = 2.0f * 3.14159265358979323846f; + #define M_4RPI (4.0f / M_PI) +#else + #ifndef M_PI + #define M_PI 3.14159265358979323846f + #endif + #ifndef M_PI2 + #define M_PI2 (2.0F * M_PI) + #endif + #define M_4RPI (4.0f / M_PI) +#endif #define DESC_MIN_FLOAT 1E-15F diff --git a/src/popsift/sift_desc.cu b/src/popsift/sift_desc.cu index b0eb0bd1..f533df35 100644 --- a/src/popsift/sift_desc.cu +++ b/src/popsift/sift_desc.cu @@ -22,7 +22,7 @@ #include #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include +#include #else #define nvtxRangePushA(a) #define nvtxRangePop() diff --git a/src/popsift/sift_pyramid.cu b/src/popsift/sift_pyramid.cu index 06060052..c03b0d61 100644 --- a/src/popsift/sift_pyramid.cu +++ b/src/popsift/sift_pyramid.cu @@ -26,7 +26,7 @@ #endif #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include +#include #else #define nvtxRangePushA(a) #define nvtxRangePop()