diff --git a/lib/THCUNN/LookupTable.cu b/lib/THCUNN/LookupTable.cu index 116639b4..1d457012 100644 --- a/lib/THCUNN/LookupTable.cu +++ b/lib/THCUNN/LookupTable.cu @@ -13,6 +13,7 @@ #include "THCHalf.h" #include "THCHalfAutoNumerics.cuh" #include "THCTensorSort.cuh" +#define FULL_MASK 0xffffffff const int WARP_SIZE = 32; @@ -29,7 +30,7 @@ __device__ __forceinline__ bool warpHasCollision(int val) #pragma unroll for (int i = 1; i <= 16; i++) { - dup |= (__shfl(val, (laneId + i) % 32) == val); + dup |= (__shfl_sync(FULL_MASK, val, (laneId + i) % 32) == val); } #else @@ -46,7 +47,7 @@ __device__ __forceinline__ bool warpHasCollision(int val) #endif - return __any(dup) != 0; + return __any_sync(FULL_MASK, dup) != 0; } template