Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Apple Accelerate #74

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,6 @@ them. See also `blas_int` above.

blas_return_float_f2c
Whether sdot, etc., returns float (usual convention) or double
(f2c convention used in CLAPACK and macOS Accelerate).
no returns float. Default except for macOS Accelerate.
yes returns double. Default for macOS Accelerate.
(f2c convention used in CLAPACK and old macOS Accelerate before macOS 13.3).
no returns float. Default except for old macOS Accelerate.
yes returns double. Default for old macOS Accelerate.
5 changes: 3 additions & 2 deletions cmake/BLASConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ set( found false )
if (NOT found)
if ("${BLAS_LIBRARIES}" MATCHES "-framework Accelerate|Accelerate.framework")
message( "${blue} Accelerate framework${plain}" )
list( APPEND blaspp_defs_ "-DBLAS_HAVE_ACCELERATE" )
#list( APPEND blaspp_defs_ "-DBLAS_HAVE_ACCELERATE" )
set( found true )
if (NOT DEFINED blas_return_float_f2c)
if (NOT DEFINED blas_return_float_f2c
AND NOT blaspp_defs_ MATCHES "ACCELERATE_NEW_LAPACK")
set( blas_return_float_f2c true )
endif()
endif()
Expand Down
70 changes: 59 additions & 11 deletions cmake/BLASFinder.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,14 @@ function( debug_print_list msg )
message( "---------- lists: ${msg}" )
message( "blas_name_list = ${blas_name_list}" )
message( "blas_libs_list = ${blas_libs_list}" )
message( "blas_defs_list = ${blas_defs_list}" )

message( "\nrow; ${red}blas_name;${plain} blas_libs" )
set( i 0 )
foreach (name IN LISTS blas_name_list)
list( GET blas_libs_list ${i} libs )
message( "${i}; ${red}${name};${plain} ${libs}" )
list( GET blas_defs_list ${i} defs )
message( "${i}; ${red}${name};${plain} libs = ${libs} defs = ${defs}" )
math( EXPR i "${i} + 1" )
endforeach()
message( "" )
Expand Down Expand Up @@ -119,12 +121,6 @@ else()
)
endif()

#---------------------------------------- integer sizes to test
set( int_size_list
" " # int (LP64)
"-DBLAS_ILP64" # int64_t (ILP64)
)

#-------------------------------------------------------------------------------
# Parse options: BLAS_LIBRARIES, blas, blas_int, blas_threaded, blas_fortran.

Expand Down Expand Up @@ -223,11 +219,20 @@ if (CMAKE_CROSSCOMPILING AND test_int AND test_int64)
" `blas_int=int64` (ilp64 convention).${plain}" )
endif()

set( int_size_list "" )
if (test_int)
list( APPEND int_size_list " " ) # int (lp64)
endif()
if (test_int64)
list( APPEND int_size_list "-DBLAS_ILP64" ) # int64_t (ILP64)
endif()

message( DEBUG "
blas_int = '${blas_int}'
blas_int_ = '${blas_int_}'
test_int = '${test_int}'
test_int64 = '${test_int64}'")
test_int64 = '${test_int64}'
int_size_list = '${int_size_list}'")

#---------------------------------------- blas_threaded
string( TOLOWER "${blas_threaded}" blas_threaded_ )
Expand Down Expand Up @@ -256,6 +261,7 @@ test_sequential = '${test_sequential}'")

set( blas_name_list "" )
set( blas_libs_list "" )
set( blas_defs_list "" )

#---------------------------------------- BLAS_LIBRARIES
if (test_blas_libraries)
Expand All @@ -266,13 +272,15 @@ if (test_blas_libraries)

list( APPEND blas_name_list "\$BLAS_LIBRARIES" )
list( APPEND blas_libs_list "${BLAS_LIBRARIES_ESC}" )
list( APPEND blas_defs_list " " )
debug_print_list( "BLAS_LIBRARIES" )
endif()

#---------------------------------------- default; Cray libsci
if (test_all OR test_default)
list( APPEND blas_name_list "default (no library)" )
list( APPEND blas_libs_list " " ) # Use space so APPEND works later.
list( APPEND blas_defs_list " " )
debug_print_list( "default" )
endif()

Expand All @@ -285,33 +293,39 @@ if (test_all OR test_mkl)
if (test_int)
list( APPEND blas_name_list "Intel MKL lp64, GNU threads (gomp), gfortran")
list( APPEND blas_libs_list "-lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core" )
list( APPEND blas_defs_list " " )
endif()

if (test_int64)
list( APPEND blas_name_list "Intel MKL ilp64, GNU threads (gomp), gfortran")
list( APPEND blas_libs_list "-lmkl_gf_ilp64 -lmkl_gnu_thread -lmkl_core" )
list( APPEND blas_defs_list " " )
endif()

elseif (test_ifort AND intelllvm_compiler)
# IntelLLVM compiler + OpenMP: require intel_thread library.
if (test_int)
list( APPEND blas_name_list "Intel MKL lp64, Intel threads (iomp5), ifort")
list( APPEND blas_libs_list "-lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core" )
list( APPEND blas_defs_list " " )
elseif (test_int64)
list( APPEND blas_name_list "Intel MKL ilp64, Intel threads (iomp5), ifort")
list( APPEND blas_libs_list "-lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core" )
list( APPEND blas_defs_list " " )
endif()

elseif (test_ifort AND intel_compiler)
# Intel compiler + OpenMP: require intel_thread library.
if (test_int)
list( APPEND blas_name_list "Intel MKL lp64, Intel threads (iomp5), ifort")
list( APPEND blas_libs_list "-lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core" )
list( APPEND blas_defs_list " " )
endif()

if (test_int64)
list( APPEND blas_name_list "Intel MKL ilp64, Intel threads (iomp5), ifort")
list( APPEND blas_libs_list "-lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core" )
list( APPEND blas_defs_list " " )
endif()

else()
Expand All @@ -327,11 +341,13 @@ if (test_all OR test_mkl)
if (test_int)
list( APPEND blas_name_list "Intel MKL lp64, sequential, ifort" )
list( APPEND blas_libs_list "-lmkl_intel_lp64 -lmkl_sequential -lmkl_core" )
list( APPEND blas_defs_list " " )
endif()

if (test_int64)
list( APPEND blas_name_list "Intel MKL ilp64, sequential, ifort" )
list( APPEND blas_libs_list "-lmkl_intel_ilp64 -lmkl_sequential -lmkl_core" )
list( APPEND blas_defs_list " " )
endif()
endif() # ifort

Expand All @@ -340,11 +356,13 @@ if (test_all OR test_mkl)
if (test_int)
list( APPEND blas_name_list "Intel MKL lp64, sequential, gfortran" )
list( APPEND blas_libs_list "-lmkl_gf_lp64 -lmkl_sequential -lmkl_core" )
list( APPEND blas_defs_list " " )
endif()

if (test_int64)
list( APPEND blas_name_list "Intel MKL ilp64, sequential, gfortran" )
list( APPEND blas_libs_list "-lmkl_gf_ilp64 -lmkl_sequential -lmkl_core" )
list( APPEND blas_defs_list " " )
endif()
endif() # gfortran

Expand All @@ -354,11 +372,13 @@ if (test_all OR test_mkl)
if (test_int)
list( APPEND blas_name_list "Intel MKL lp64, sequential, ifort" )
list( APPEND blas_libs_list "-lmkl_intel_lp64 -lmkl_sequential -lmkl_core" )
list( APPEND blas_defs_list " " )
endif()

if (test_int64)
list( APPEND blas_name_list "Intel MKL ilp64, sequential, ifort" )
list( APPEND blas_libs_list "-lmkl_intel_ilp64 -lmkl_sequential -lmkl_core" )
list( APPEND blas_defs_list " " )
endif()
endif() # ifort && not intel
endif() # sequential
Expand Down Expand Up @@ -387,11 +407,13 @@ if (test_all OR test_essl)
if (test_int)
list( APPEND blas_name_list "IBM ESSL int (lp64), multi-threaded, with OpenMP" )
list( APPEND blas_libs_list "-lesslsmp" )
list( APPEND blas_defs_list " " )
endif()

if (test_int64)
list( APPEND blas_name_list "IBM ESSL int64 (ilp64), multi-threaded, with OpenMP" )
list( APPEND blas_libs_list "-lesslsmp6464" )
list( APPEND blas_defs_list " " )
endif()
endif()
endif() # threaded
Expand All @@ -400,11 +422,13 @@ if (test_all OR test_essl)
if (test_int)
list( APPEND blas_name_list "IBM ESSL int (lp64), sequential" )
list( APPEND blas_libs_list "-lessl" )
list( APPEND blas_defs_list " " )
endif()

if (test_int64)
list( APPEND blas_name_list "IBM ESSL int64 (ilp64), sequential" )
list( APPEND blas_libs_list "-lessl6464" )
list( APPEND blas_defs_list " " )
endif()
endif() # sequential
debug_print_list( "essl" )
Expand All @@ -415,20 +439,28 @@ if (test_all OR test_openblas)
# todo: OPENBLAS_?(ROOT|DIR)
list( APPEND blas_name_list "OpenBLAS" )
list( APPEND blas_libs_list "-lopenblas" )
list( APPEND blas_defs_list " " )
debug_print_list( "openblas" )
endif()

#---------------------------------------- Apple Accelerate
if (test_all OR test_accelerate)
list( APPEND blas_name_list "Apple Accelerate" )
list( APPEND blas_name_list "Apple Accelerate (new)" )
list( APPEND blas_libs_list "-framework Accelerate" )
list( APPEND blas_defs_list "-DBLAS_HAVE_ACCELERATE -DACCELERATE_NEW_LAPACK" )

list( APPEND blas_name_list "Apple Accelerate (old, pre 13.3)" )
list( APPEND blas_libs_list "-framework Accelerate" )
list( APPEND blas_defs_list "-DBLAS_HAVE_ACCELERATE" )

debug_print_list( "accelerate" )
endif()

#---------------------------------------- generic -lblas
if (test_all OR test_generic)
list( APPEND blas_name_list "generic" )
list( APPEND blas_libs_list "-lblas" )
list( APPEND blas_defs_list " " )
debug_print_list( "generic" )
endif()

Expand All @@ -439,11 +471,13 @@ if (test_all OR test_acml)
if (test_threaded)
list( APPEND blas_name_list "AMD ACML threaded" )
list( APPEND blas_libs_list "-lacml_mp" )
list( APPEND blas_defs_list " " )
endif()

if (test_sequential)
list( APPEND blas_name_list "AMD ACML sequential" )
list( APPEND blas_libs_list "-lacml" )
list( APPEND blas_defs_list " " )
endif()
debug_print_list( "acml" )
endif()
Expand All @@ -458,14 +492,19 @@ set( i 0 )
foreach (blas_name IN LISTS blas_name_list)
message( TRACE "i: ${i}" )
list( GET blas_libs_list ${i} blas_libs )
list( GET blas_defs_list ${i} blas_defs )
math( EXPR i "${i}+1" )

if (i GREATER 1)
message( "" )
endif()
message( "${blas_name}" )
message( " libs: ${blas_libs}" )
if (defs MATCHES "[^ ]") # non-empty
message( " defs: ${blas_defs}" )
endif()

# Split space-separated libs into CMake list.
# Strip to deal with default lib being space, " ".
# Undo escaping \; semi-colons and split on spaces to make list.
# But keep '-framework Accelerate' together as one item.
Expand All @@ -488,7 +527,7 @@ foreach (blas_name IN LISTS blas_name_list)
LINK_LIBRARIES
${blas_libs} ${openmp_lib} # not "..." quoted; screws up OpenMP
COMPILE_DEFINITIONS
"${mangling} ${int_size}"
"${mangling} ${int_size} ${blas_defs}"
OUTPUT_VARIABLE
link_output
)
Expand All @@ -509,7 +548,7 @@ foreach (blas_name IN LISTS blas_name_list)
LINK_LIBRARIES
${blas_libs} ${openmp_lib} # not "..." quoted; screws up OpenMP
COMPILE_DEFINITIONS
"${mangling} ${int_size}"
"${mangling} ${int_size} ${blas_defs}"
COMPILE_OUTPUT_VARIABLE
compile_output
RUN_OUTPUT_VARIABLE
Expand All @@ -533,6 +572,12 @@ foreach (blas_name IN LISTS blas_name_list)
# If it runs and prints ok, we're done, so break all 3 loops.
message( "${label} ${blue} yes${plain}" )

# Split space-separated defs into CMake list.
message( DEBUG " blas_defs: '${blas_defs}'" )
string( STRIP "${blas_defs}" blas_defs )
string( REGEX REPLACE "([^ ])( +|\\\;)" "\\1;" blas_defs "${blas_defs}" )
message( DEBUG " blas_defs: '${blas_defs}' (split)" )

set( BLAS_FOUND true CACHE INTERNAL "" )
set( BLAS_LIBRARIES "${blas_libs}" CACHE STRING "" FORCE )
if (mangling MATCHES "[^ ]") # non-empty
Expand All @@ -541,6 +586,9 @@ foreach (blas_name IN LISTS blas_name_list)
if (int_size MATCHES "[^ ]") # non-empty
list( APPEND blaspp_defs_ "${int_size}" )
endif()
if (blas_defs MATCHES "[^ ]") # non-empty
list( APPEND blaspp_defs_ "${blas_defs}" )
endif()
break()
else()
message( "${label} ${red} no (didn't run: int mismatch, etc.)${plain}" )
Expand Down
32 changes: 25 additions & 7 deletions config/blas.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,23 @@
#include "config.h"

//------------------------------------------------------------------------------
#define BLAS_sdot FORTRAN_NAME( sdot, SDOT )
#define BLAS_ddot FORTRAN_NAME( ddot, DDOT )

// result return directly
#ifdef __cplusplus
extern "C"
#ifdef ACCELERATE_NEW_LAPACK
#pragma message "include Accelerate.h"
#include <stdlib.h> // workaround
#include <Accelerate/Accelerate.h>
#else
// result returned directly
#ifdef __cplusplus
extern "C"
#endif
double BLAS_ddot(
const blas_int* n,
const double* x, const blas_int* incx,
const double* y, const blas_int* incy );
#endif
double BLAS_ddot(
const blas_int* n,
const double* x, const blas_int* incx,
const double* y, const blas_int* incy );

//------------------------------------------------------------------------------
int main()
Expand All @@ -28,6 +35,17 @@ int main()
// If blas_int is 64-bit, BLAS can interpret it as 32-bit or 64-bit
// to see n = 5 and pass.
blas_int n[] = { 5, 5 }, ione = 1;

#ifdef ACCELERATE_NEW_LAPACK
// To verify this is new Accelerate, check the return type of sdot.
float sx[] = { 1, 2, 3, 4, 5 };
float sy[] = { 5, 4, 3, 2, 1 };
auto r = BLAS_sdot( n, sx, &ione, sy, &ione );
static_assert( std::is_same< float, decltype( r ) >::value,
"With new Accelerate, sdot returns float"
" -> this is old Accelerate" );
#endif

double x[] = { 1, 2, 3, 4, 5 };
double y[] = { 5, 4, 3, 2, 1 };
for (int i = 0; i < n[0]; ++i) {
Expand Down
6 changes: 6 additions & 0 deletions config/cblas.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@

#include <stdio.h>

//------------------------------------------------------------------------------
#if defined(BLAS_HAVE_MKL)
#pragma message "include mkl_cblas.h"
#if defined(BLAS_ILP64) && ! defined(MKL_ILP64)
#define MKL_ILP64
#endif
#include <mkl_cblas.h>

#elif defined(BLAS_HAVE_ESSL)
#pragma message "include essl.h"
#if defined(BLAS_ILP64) && ! defined(_ESV6464)
#define _ESV6464
#endif
Expand All @@ -22,8 +25,11 @@
// Unfortunately with Xcode 10.3 and GNU g++ 9.3, that doesn't compile.
// If we can find cblas.h, use it, otherwise use Accelerate.h.
#ifdef BLAS_HAVE_ACCELERATE_CBLAS_H
#pragma message "include cblas.h for Accelerate"
#include <cblas.h>
#else
#pragma message "include Accelerate.h"
#include <stdlib.h> // workaround
#include <Accelerate/Accelerate.h>
#endif
#else
Expand Down
Loading
Loading